1 /*
2 * Copyright © 2016 Red Hat
3 * based on intel anv code:
4 * Copyright © 2015 Intel Corporation
5 *
6 * SPDX-License-Identifier: MIT
7 */
8
9 #include "radv_meta.h"
10 #include "radv_printf.h"
11
12 #include "vk_common_entrypoints.h"
13 #include "vk_pipeline_cache.h"
14 #include "vk_util.h"
15
16 static void
radv_suspend_queries(struct radv_meta_saved_state * state,struct radv_cmd_buffer * cmd_buffer)17 radv_suspend_queries(struct radv_meta_saved_state *state, struct radv_cmd_buffer *cmd_buffer)
18 {
19 const uint32_t num_pipeline_stat_queries = radv_get_num_pipeline_stat_queries(cmd_buffer);
20
21 if (num_pipeline_stat_queries > 0) {
22 cmd_buffer->state.flush_bits &= ~RADV_CMD_FLAG_START_PIPELINE_STATS;
23 cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_STOP_PIPELINE_STATS;
24 }
25
26 /* Pipeline statistics queries. */
27 if (cmd_buffer->state.active_pipeline_queries > 0) {
28 state->active_emulated_pipeline_queries = cmd_buffer->state.active_emulated_pipeline_queries;
29 cmd_buffer->state.active_emulated_pipeline_queries = 0;
30 cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
31 }
32
33 /* Occlusion queries. */
34 if (cmd_buffer->state.active_occlusion_queries) {
35 state->active_occlusion_queries = cmd_buffer->state.active_occlusion_queries;
36 cmd_buffer->state.active_occlusion_queries = 0;
37 cmd_buffer->state.dirty |= RADV_CMD_DIRTY_OCCLUSION_QUERY;
38 }
39
40 /* Primitives generated queries (legacy). */
41 if (cmd_buffer->state.active_prims_gen_queries) {
42 cmd_buffer->state.suspend_streamout = true;
43 cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_ENABLE;
44 }
45
46 /* Primitives generated queries (NGG). */
47 if (cmd_buffer->state.active_emulated_prims_gen_queries) {
48 state->active_emulated_prims_gen_queries = cmd_buffer->state.active_emulated_prims_gen_queries;
49 cmd_buffer->state.active_emulated_prims_gen_queries = 0;
50 cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
51 }
52
53 /* Transform feedback queries (NGG). */
54 if (cmd_buffer->state.active_emulated_prims_xfb_queries) {
55 state->active_emulated_prims_xfb_queries = cmd_buffer->state.active_emulated_prims_xfb_queries;
56 cmd_buffer->state.active_emulated_prims_xfb_queries = 0;
57 cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
58 }
59 }
60
61 static void
radv_resume_queries(const struct radv_meta_saved_state * state,struct radv_cmd_buffer * cmd_buffer)62 radv_resume_queries(const struct radv_meta_saved_state *state, struct radv_cmd_buffer *cmd_buffer)
63 {
64 const uint32_t num_pipeline_stat_queries = radv_get_num_pipeline_stat_queries(cmd_buffer);
65
66 if (num_pipeline_stat_queries > 0) {
67 cmd_buffer->state.flush_bits &= ~RADV_CMD_FLAG_STOP_PIPELINE_STATS;
68 cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_START_PIPELINE_STATS;
69 }
70
71 /* Pipeline statistics queries. */
72 if (cmd_buffer->state.active_pipeline_queries > 0) {
73 cmd_buffer->state.active_emulated_pipeline_queries = state->active_emulated_pipeline_queries;
74 cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
75 }
76
77 /* Occlusion queries. */
78 if (state->active_occlusion_queries) {
79 cmd_buffer->state.active_occlusion_queries = state->active_occlusion_queries;
80 cmd_buffer->state.dirty |= RADV_CMD_DIRTY_OCCLUSION_QUERY;
81 }
82
83 /* Primitives generated queries (legacy). */
84 if (cmd_buffer->state.active_prims_gen_queries) {
85 cmd_buffer->state.suspend_streamout = false;
86 cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_ENABLE;
87 }
88
89 /* Primitives generated queries (NGG). */
90 if (state->active_emulated_prims_gen_queries) {
91 cmd_buffer->state.active_emulated_prims_gen_queries = state->active_emulated_prims_gen_queries;
92 cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
93 }
94
95 /* Transform feedback queries (NGG). */
96 if (state->active_emulated_prims_xfb_queries) {
97 cmd_buffer->state.active_emulated_prims_xfb_queries = state->active_emulated_prims_xfb_queries;
98 cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
99 }
100 }
101
102 void
radv_meta_save(struct radv_meta_saved_state * state,struct radv_cmd_buffer * cmd_buffer,uint32_t flags)103 radv_meta_save(struct radv_meta_saved_state *state, struct radv_cmd_buffer *cmd_buffer, uint32_t flags)
104 {
105 VkPipelineBindPoint bind_point =
106 flags & RADV_META_SAVE_GRAPHICS_PIPELINE ? VK_PIPELINE_BIND_POINT_GRAPHICS : VK_PIPELINE_BIND_POINT_COMPUTE;
107 struct radv_descriptor_state *descriptors_state = radv_get_descriptors_state(cmd_buffer, bind_point);
108
109 assert(flags & (RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_COMPUTE_PIPELINE));
110
111 state->flags = flags;
112 state->active_occlusion_queries = 0;
113 state->active_emulated_prims_gen_queries = 0;
114 state->active_emulated_prims_xfb_queries = 0;
115
116 if (state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE) {
117 assert(!(state->flags & RADV_META_SAVE_COMPUTE_PIPELINE));
118
119 state->old_graphics_pipeline = cmd_buffer->state.graphics_pipeline;
120
121 /* Save all dynamic states. */
122 state->dynamic = cmd_buffer->state.dynamic;
123 }
124
125 if (state->flags & RADV_META_SAVE_COMPUTE_PIPELINE) {
126 assert(!(state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE));
127
128 state->old_compute_pipeline = cmd_buffer->state.compute_pipeline;
129 }
130
131 for (unsigned i = 0; i <= MESA_SHADER_MESH; i++) {
132 state->old_shader_objs[i] = cmd_buffer->state.shader_objs[i];
133 }
134
135 if (state->flags & RADV_META_SAVE_DESCRIPTORS) {
136 state->old_descriptor_set0 = descriptors_state->sets[0];
137 if (!(descriptors_state->valid & 1))
138 state->flags &= ~RADV_META_SAVE_DESCRIPTORS;
139 }
140
141 if (state->flags & RADV_META_SAVE_CONSTANTS) {
142 memcpy(state->push_constants, cmd_buffer->push_constants, MAX_PUSH_CONSTANTS_SIZE);
143 }
144
145 if (state->flags & RADV_META_SAVE_RENDER) {
146 state->render = cmd_buffer->state.render;
147 radv_cmd_buffer_reset_rendering(cmd_buffer);
148 }
149
150 if (state->flags & RADV_META_SUSPEND_PREDICATING) {
151 state->predicating = cmd_buffer->state.predicating;
152 cmd_buffer->state.predicating = false;
153 }
154
155 radv_suspend_queries(state, cmd_buffer);
156 }
157
158 void
radv_meta_restore(const struct radv_meta_saved_state * state,struct radv_cmd_buffer * cmd_buffer)159 radv_meta_restore(const struct radv_meta_saved_state *state, struct radv_cmd_buffer *cmd_buffer)
160 {
161 VkPipelineBindPoint bind_point = state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE ? VK_PIPELINE_BIND_POINT_GRAPHICS
162 : VK_PIPELINE_BIND_POINT_COMPUTE;
163
164 if (state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE) {
165 if (state->old_graphics_pipeline) {
166 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS,
167 radv_pipeline_to_handle(&state->old_graphics_pipeline->base));
168 }
169
170 /* Restore all dynamic states. */
171 cmd_buffer->state.dynamic = state->dynamic;
172 cmd_buffer->state.dirty_dynamic |= RADV_DYNAMIC_ALL;
173
174 /* Re-emit the guardband state because meta operations changed dynamic states. */
175 cmd_buffer->state.dirty |= RADV_CMD_DIRTY_GUARDBAND;
176 }
177
178 if (state->flags & RADV_META_SAVE_COMPUTE_PIPELINE) {
179 if (state->old_compute_pipeline) {
180 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
181 radv_pipeline_to_handle(&state->old_compute_pipeline->base));
182 }
183 }
184
185 VkShaderEXT shaders[MESA_SHADER_MESH + 1];
186 VkShaderStageFlagBits stages[MESA_SHADER_MESH + 1];
187 uint32_t stage_count = 0;
188
189 for (unsigned i = 0; i <= MESA_SHADER_MESH; i++) {
190 if (state->old_shader_objs[i]) {
191 stages[stage_count] = mesa_to_vk_shader_stage(i);
192 shaders[stage_count] = radv_shader_object_to_handle(state->old_shader_objs[i]);
193 stage_count++;
194 }
195 }
196
197 if (stage_count > 0) {
198 radv_CmdBindShadersEXT(radv_cmd_buffer_to_handle(cmd_buffer), stage_count, stages, shaders);
199 }
200
201 if (state->flags & RADV_META_SAVE_DESCRIPTORS) {
202 radv_set_descriptor_set(cmd_buffer, bind_point, state->old_descriptor_set0, 0);
203 }
204
205 if (state->flags & RADV_META_SAVE_CONSTANTS) {
206 VkShaderStageFlags stage_flags = VK_SHADER_STAGE_COMPUTE_BIT;
207
208 if (state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE)
209 stage_flags |= VK_SHADER_STAGE_ALL_GRAPHICS;
210
211 vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), VK_NULL_HANDLE, stage_flags, 0,
212 MAX_PUSH_CONSTANTS_SIZE, state->push_constants);
213 }
214
215 if (state->flags & RADV_META_SAVE_RENDER) {
216 cmd_buffer->state.render = state->render;
217 cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FRAMEBUFFER;
218 }
219
220 if (state->flags & RADV_META_SUSPEND_PREDICATING)
221 cmd_buffer->state.predicating = state->predicating;
222
223 radv_resume_queries(state, cmd_buffer);
224 }
225
226 VkImageViewType
radv_meta_get_view_type(const struct radv_image * image)227 radv_meta_get_view_type(const struct radv_image *image)
228 {
229 switch (image->vk.image_type) {
230 case VK_IMAGE_TYPE_1D:
231 return VK_IMAGE_VIEW_TYPE_1D;
232 case VK_IMAGE_TYPE_2D:
233 return VK_IMAGE_VIEW_TYPE_2D;
234 case VK_IMAGE_TYPE_3D:
235 return VK_IMAGE_VIEW_TYPE_3D;
236 default:
237 unreachable("bad VkImageViewType");
238 }
239 }
240
241 /**
242 * When creating a destination VkImageView, this function provides the needed
243 * VkImageViewCreateInfo::subresourceRange::baseArrayLayer.
244 */
245 uint32_t
radv_meta_get_iview_layer(const struct radv_image * dst_image,const VkImageSubresourceLayers * dst_subresource,const VkOffset3D * dst_offset)246 radv_meta_get_iview_layer(const struct radv_image *dst_image, const VkImageSubresourceLayers *dst_subresource,
247 const VkOffset3D *dst_offset)
248 {
249 switch (dst_image->vk.image_type) {
250 case VK_IMAGE_TYPE_1D:
251 case VK_IMAGE_TYPE_2D:
252 return dst_subresource->baseArrayLayer;
253 case VK_IMAGE_TYPE_3D:
254 /* HACK: Vulkan does not allow attaching a 3D image to a framebuffer,
255 * but meta does it anyway. When doing so, we translate the
256 * destination's z offset into an array offset.
257 */
258 return dst_offset->z;
259 default:
260 assert(!"bad VkImageType");
261 return 0;
262 }
263 }
264
265 static VKAPI_ATTR void *VKAPI_CALL
meta_alloc(void * _device,size_t size,size_t alignment,VkSystemAllocationScope allocationScope)266 meta_alloc(void *_device, size_t size, size_t alignment, VkSystemAllocationScope allocationScope)
267 {
268 struct radv_device *device = _device;
269 return device->vk.alloc.pfnAllocation(device->vk.alloc.pUserData, size, alignment,
270 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
271 }
272
273 static VKAPI_ATTR void *VKAPI_CALL
meta_realloc(void * _device,void * original,size_t size,size_t alignment,VkSystemAllocationScope allocationScope)274 meta_realloc(void *_device, void *original, size_t size, size_t alignment, VkSystemAllocationScope allocationScope)
275 {
276 struct radv_device *device = _device;
277 return device->vk.alloc.pfnReallocation(device->vk.alloc.pUserData, original, size, alignment,
278 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
279 }
280
281 static VKAPI_ATTR void VKAPI_CALL
meta_free(void * _device,void * data)282 meta_free(void *_device, void *data)
283 {
284 struct radv_device *device = _device;
285 device->vk.alloc.pfnFree(device->vk.alloc.pUserData, data);
286 }
287
288 static void
radv_init_meta_cache(struct radv_device * device)289 radv_init_meta_cache(struct radv_device *device)
290 {
291 const struct radv_physical_device *pdev = radv_device_physical(device);
292 struct vk_pipeline_cache *cache;
293
294 VkPipelineCacheCreateInfo create_info = {
295 .sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO,
296 };
297
298 struct vk_pipeline_cache_create_info info = {
299 .pCreateInfo = &create_info,
300 .disk_cache = pdev->disk_cache_meta,
301 };
302
303 cache = vk_pipeline_cache_create(&device->vk, &info, NULL);
304 if (cache)
305 device->meta_state.cache = vk_pipeline_cache_to_handle(cache);
306 }
307
308 VkResult
radv_device_init_meta(struct radv_device * device)309 radv_device_init_meta(struct radv_device *device)
310 {
311 const struct radv_physical_device *pdev = radv_device_physical(device);
312 VkResult result;
313
314 memset(&device->meta_state, 0, sizeof(device->meta_state));
315
316 device->meta_state.alloc = (VkAllocationCallbacks){
317 .pUserData = device,
318 .pfnAllocation = meta_alloc,
319 .pfnReallocation = meta_realloc,
320 .pfnFree = meta_free,
321 };
322
323 radv_init_meta_cache(device);
324
325 result = vk_meta_device_init(&device->vk, &device->meta_state.device);
326 if (result != VK_SUCCESS)
327 return result;
328
329 device->meta_state.device.pipeline_cache = device->meta_state.cache;
330
331 mtx_init(&device->meta_state.mtx, mtx_plain);
332
333 if (pdev->emulate_etc2) {
334 device->meta_state.etc_decode.allocator = &device->meta_state.alloc;
335 device->meta_state.etc_decode.nir_options = &pdev->nir_options[MESA_SHADER_COMPUTE];
336 device->meta_state.etc_decode.pipeline_cache = device->meta_state.cache;
337
338 vk_texcompress_etc2_init(&device->vk, &device->meta_state.etc_decode);
339 }
340
341 if (pdev->emulate_astc) {
342 result = vk_texcompress_astc_init(&device->vk, &device->meta_state.alloc, device->meta_state.cache,
343 &device->meta_state.astc_decode);
344 if (result != VK_SUCCESS)
345 return result;
346 }
347
348 if (device->vk.enabled_features.nullDescriptor) {
349 result = radv_device_init_null_accel_struct(device);
350 if (result != VK_SUCCESS)
351 return result;
352 }
353
354 return VK_SUCCESS;
355 }
356
357 void
radv_device_finish_meta(struct radv_device * device)358 radv_device_finish_meta(struct radv_device *device)
359 {
360 const struct radv_physical_device *pdev = radv_device_physical(device);
361
362 if (pdev->emulate_etc2)
363 vk_texcompress_etc2_finish(&device->vk, &device->meta_state.etc_decode);
364
365 if (pdev->emulate_astc) {
366 if (device->meta_state.astc_decode)
367 vk_texcompress_astc_finish(&device->vk, &device->meta_state.alloc, device->meta_state.astc_decode);
368 }
369
370 radv_device_finish_accel_struct_build_state(device);
371
372 vk_common_DestroyPipelineCache(radv_device_to_handle(device), device->meta_state.cache, NULL);
373 mtx_destroy(&device->meta_state.mtx);
374
375 if (device->meta_state.device.cache)
376 vk_meta_device_finish(&device->vk, &device->meta_state.device);
377 }
378
379 nir_builder PRINTFLIKE(3, 4)
radv_meta_init_shader(struct radv_device * dev,gl_shader_stage stage,const char * name,...)380 radv_meta_init_shader(struct radv_device *dev, gl_shader_stage stage, const char *name, ...)
381 {
382 const struct radv_physical_device *pdev = radv_device_physical(dev);
383 nir_builder b = nir_builder_init_simple_shader(stage, NULL, NULL);
384 if (name) {
385 va_list args;
386 va_start(args, name);
387 b.shader->info.name = ralloc_vasprintf(b.shader, name, args);
388 va_end(args);
389 }
390
391 b.shader->options = &pdev->nir_options[stage];
392
393 radv_device_associate_nir(dev, b.shader);
394
395 return b;
396 }
397
398 /* vertex shader that generates vertices */
399 nir_shader *
radv_meta_build_nir_vs_generate_vertices(struct radv_device * dev)400 radv_meta_build_nir_vs_generate_vertices(struct radv_device *dev)
401 {
402 const struct glsl_type *vec4 = glsl_vec4_type();
403
404 nir_variable *v_position;
405
406 nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_VERTEX, "meta_vs_gen_verts");
407
408 nir_def *outvec = nir_gen_rect_vertices(&b, NULL, NULL);
409
410 v_position = nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");
411 v_position->data.location = VARYING_SLOT_POS;
412
413 nir_store_var(&b, v_position, outvec, 0xf);
414
415 return b.shader;
416 }
417
418 nir_shader *
radv_meta_build_nir_fs_noop(struct radv_device * dev)419 radv_meta_build_nir_fs_noop(struct radv_device *dev)
420 {
421 return radv_meta_init_shader(dev, MESA_SHADER_FRAGMENT, "meta_noop_fs").shader;
422 }
423
424 void
radv_meta_build_resolve_shader_core(struct radv_device * device,nir_builder * b,bool is_integer,int samples,nir_variable * input_img,nir_variable * color,nir_def * img_coord)425 radv_meta_build_resolve_shader_core(struct radv_device *device, nir_builder *b, bool is_integer, int samples,
426 nir_variable *input_img, nir_variable *color, nir_def *img_coord)
427 {
428 const struct radv_physical_device *pdev = radv_device_physical(device);
429 nir_deref_instr *input_img_deref = nir_build_deref_var(b, input_img);
430 nir_def *sample0 = nir_txf_ms_deref(b, input_img_deref, img_coord, nir_imm_int(b, 0));
431
432 if (is_integer || samples <= 1) {
433 nir_store_var(b, color, sample0, 0xf);
434 return;
435 }
436
437 if (pdev->use_fmask) {
438 nir_def *all_same = nir_samples_identical_deref(b, input_img_deref, img_coord);
439 nir_push_if(b, nir_inot(b, all_same));
440 }
441
442 nir_def *accum = sample0;
443 for (int i = 1; i < samples; i++) {
444 nir_def *sample = nir_txf_ms_deref(b, input_img_deref, img_coord, nir_imm_int(b, i));
445 accum = nir_fadd(b, accum, sample);
446 }
447
448 accum = nir_fdiv_imm(b, accum, samples);
449 nir_store_var(b, color, accum, 0xf);
450
451 if (pdev->use_fmask) {
452 nir_push_else(b, NULL);
453 nir_store_var(b, color, sample0, 0xf);
454 nir_pop_if(b, NULL);
455 }
456 }
457
458 nir_def *
radv_meta_load_descriptor(nir_builder * b,unsigned desc_set,unsigned binding)459 radv_meta_load_descriptor(nir_builder *b, unsigned desc_set, unsigned binding)
460 {
461 nir_def *rsrc = nir_vulkan_resource_index(b, 3, 32, nir_imm_int(b, 0), .desc_set = desc_set, .binding = binding);
462 return nir_trim_vector(b, rsrc, 2);
463 }
464
465 nir_def *
get_global_ids(nir_builder * b,unsigned num_components)466 get_global_ids(nir_builder *b, unsigned num_components)
467 {
468 unsigned mask = BITFIELD_MASK(num_components);
469
470 nir_def *local_ids = nir_channels(b, nir_load_local_invocation_id(b), mask);
471 nir_def *block_ids = nir_channels(b, nir_load_workgroup_id(b), mask);
472 nir_def *block_size =
473 nir_channels(b,
474 nir_imm_ivec4(b, b->shader->info.workgroup_size[0], b->shader->info.workgroup_size[1],
475 b->shader->info.workgroup_size[2], 0),
476 mask);
477
478 return nir_iadd(b, nir_imul(b, block_ids, block_size), local_ids);
479 }
480
481 void
radv_break_on_count(nir_builder * b,nir_variable * var,nir_def * count)482 radv_break_on_count(nir_builder *b, nir_variable *var, nir_def *count)
483 {
484 nir_def *counter = nir_load_var(b, var);
485
486 nir_break_if(b, nir_uge(b, counter, count));
487
488 counter = nir_iadd_imm(b, counter, 1);
489 nir_store_var(b, var, counter, 0x1);
490 }
491
492 VkResult
radv_meta_get_noop_pipeline_layout(struct radv_device * device,VkPipelineLayout * layout_out)493 radv_meta_get_noop_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout_out)
494 {
495 enum radv_meta_object_key_type key = RADV_META_OBJECT_KEY_NOOP;
496
497 return vk_meta_get_pipeline_layout(&device->vk, &device->meta_state.device, NULL, NULL, &key, sizeof(key),
498 layout_out);
499 }
500