1 /*
2 * Copyright © 2024 Collabora Ltd. and Red Hat Inc.
3 * SPDX-License-Identifier: MIT
4 */
5 #include "nvk_indirect_execution_set.h"
6
7 #include "nvk_cmd_buffer.h"
8 #include "nvk_entrypoints.h"
9 #include "nvk_device.h"
10 #include "nvk_shader.h"
11 #include "vk_pipeline.h"
12
13 static void *
nvk_ies_map(struct nvk_indirect_execution_set * ies,uint32_t index)14 nvk_ies_map(struct nvk_indirect_execution_set *ies, uint32_t index)
15 {
16 assert(index < ies->count);
17 return ies->mem->map + (index * (size_t)ies->stride_B);
18 }
19
20 void
nvk_ies_cs_qmd_init(struct nvk_physical_device * pdev,struct nvk_ies_cs_qmd * qmd,struct nvk_shader * shader)21 nvk_ies_cs_qmd_init(struct nvk_physical_device *pdev,
22 struct nvk_ies_cs_qmd *qmd,
23 struct nvk_shader *shader)
24 {
25 struct nak_qmd_info qmd_info = {
26 .addr = shader->hdr_addr,
27 .smem_size = shader->info.cs.smem_size,
28 .smem_max = NVK_MAX_SHARED_SIZE,
29 };
30
31 assert(shader->cbuf_map.cbuf_count <= ARRAY_SIZE(qmd_info.cbufs));
32 for (uint32_t c = 0; c < shader->cbuf_map.cbuf_count; c++) {
33 const struct nvk_cbuf *cbuf = &shader->cbuf_map.cbufs[c];
34 switch (cbuf->type) {
35 case NVK_CBUF_TYPE_ROOT_DESC:
36 /* This one gets patched with the actual address */
37 assert(c == 0);
38 qmd_info.cbufs[qmd_info.num_cbufs++] = (struct nak_qmd_cbuf) {
39 .index = c,
40 .addr = 0xc0ffee000,
41 .size = sizeof(struct nvk_root_descriptor_table),
42 };
43 break;
44
45 case NVK_CBUF_TYPE_SHADER_DATA:
46 qmd_info.cbufs[qmd_info.num_cbufs++] = (struct nak_qmd_cbuf) {
47 .index = c,
48 .addr = shader->data_addr,
49 .size = shader->data_size,
50 };
51 break;
52
53 default:
54 unreachable("Unsupported cbuf type");
55 }
56 }
57
58 nak_fill_qmd(&pdev->info, &shader->info, &qmd_info,
59 qmd->qmd, sizeof(qmd->qmd));
60 }
61
62 static void
nvk_ies_set_cs(struct nvk_device * dev,struct nvk_indirect_execution_set * ies,uint32_t index,struct nvk_shader * shader)63 nvk_ies_set_cs(struct nvk_device *dev,
64 struct nvk_indirect_execution_set *ies,
65 uint32_t index,
66 struct nvk_shader *shader)
67 {
68 struct nvk_ies_cs_qmd qmd = {};
69 nvk_ies_cs_qmd_init(nvk_device_physical(dev), &qmd, shader);
70
71 assert(sizeof(qmd) <= ies->stride_B);
72 memcpy(nvk_ies_map(ies, index), &qmd, sizeof(qmd));
73 }
74
75 uint16_t
nvk_ies_gfx_pipeline_max_dw_count(struct nvk_physical_device * pdev,VkShaderStageFlags stages)76 nvk_ies_gfx_pipeline_max_dw_count(struct nvk_physical_device *pdev,
77 VkShaderStageFlags stages)
78 {
79 gl_shader_stage last_vtgm = MESA_SHADER_VERTEX;
80 u_foreach_bit(s, stages) {
81 gl_shader_stage stage = vk_to_mesa_shader_stage(1 << s);
82 if (stage != MESA_SHADER_FRAGMENT)
83 last_vtgm = stage;
84 }
85
86 uint16_t push_dw = 0;
87 u_foreach_bit(s, stages) {
88 gl_shader_stage stage = vk_to_mesa_shader_stage(1 << s);
89 push_dw += nvk_max_shader_push_dw(pdev, stage, stage == last_vtgm);
90 }
91
92 return push_dw;
93 }
94
95 static uint32_t
nvk_ies_stride_gfx_pipeline(struct nvk_physical_device * pdev,VkShaderStageFlags stages)96 nvk_ies_stride_gfx_pipeline(struct nvk_physical_device *pdev,
97 VkShaderStageFlags stages)
98 {
99 return sizeof(struct nvk_ies_gfx_pipeline) +
100 (4 * nvk_ies_gfx_pipeline_max_dw_count(pdev, stages));
101 }
102
103 static void
nvk_ies_set_gfx_pipeline(struct nvk_device * dev,struct nvk_indirect_execution_set * ies,uint32_t index,struct vk_pipeline * pipeline)104 nvk_ies_set_gfx_pipeline(struct nvk_device *dev,
105 struct nvk_indirect_execution_set *ies,
106 uint32_t index,
107 struct vk_pipeline *pipeline)
108 {
109 gl_shader_stage last_vtgm = MESA_SHADER_VERTEX;
110 struct nvk_shader *type_shader[6] = {};
111 u_foreach_bit(s, pipeline->stages) {
112 gl_shader_stage stage = vk_to_mesa_shader_stage(1 << s);
113 struct vk_shader *vk_shader = vk_pipeline_get_shader(pipeline, stage);
114 struct nvk_shader *shader =
115 container_of(vk_shader, struct nvk_shader, vk);
116
117 assert(shader->info.stage == stage);
118
119 if (stage != MESA_SHADER_FRAGMENT)
120 last_vtgm = stage;
121
122 uint32_t type = mesa_to_nv9097_shader_type(stage);
123 type_shader[type] = shader;
124 }
125
126 void *map = nvk_ies_map(ies, index);
127
128 uint16_t dw_count = 0;
129 for (uint32_t i = 0; i < ARRAY_SIZE(type_shader); i++) {
130 if (type_shader[i] == NULL)
131 continue;
132
133 const uint16_t s_dw_count = type_shader[i]->info.stage == last_vtgm
134 ? type_shader[i]->vtgm_push_dw_count
135 : type_shader[i]->push_dw_count;
136 memcpy(map + sizeof(struct nvk_ies_gfx_pipeline) + dw_count * 4,
137 type_shader[i]->push_dw, s_dw_count * 4);
138 dw_count += s_dw_count;
139 }
140
141 struct nvk_ies_gfx_pipeline hdr = {
142 .dw_count = dw_count,
143 };
144 memcpy(map, &hdr, sizeof(hdr));
145 }
146
147 uint16_t
nvk_ies_gfx_shader_max_dw_count(struct nvk_physical_device * pdev,VkShaderStageFlags stages,bool last_vtgm)148 nvk_ies_gfx_shader_max_dw_count(struct nvk_physical_device *pdev,
149 VkShaderStageFlags stages,
150 bool last_vtgm)
151 {
152 /* Each entry is a single shader so take the max */
153 uint16_t max_push_dw = 0;
154 u_foreach_bit(s, stages) {
155 gl_shader_stage stage = vk_to_mesa_shader_stage(1 << s);
156 uint16_t push_dw = nvk_max_shader_push_dw(pdev, stage, last_vtgm);
157 max_push_dw = MAX2(max_push_dw, push_dw);
158 }
159
160 return max_push_dw;
161 }
162
163 static uint32_t
nvk_ies_stride_gfx_shader(struct nvk_physical_device * pdev,VkShaderStageFlags stages)164 nvk_ies_stride_gfx_shader(struct nvk_physical_device *pdev,
165 VkShaderStageFlags stages)
166 {
167 return sizeof(struct nvk_ies_gfx_shader) +
168 (4 * nvk_ies_gfx_shader_max_dw_count(pdev, stages, true));
169 }
170
171 static void
nvk_ies_set_gfx_shader(struct nvk_device * dev,struct nvk_indirect_execution_set * ies,uint32_t index,struct nvk_shader * shader)172 nvk_ies_set_gfx_shader(struct nvk_device *dev,
173 struct nvk_indirect_execution_set *ies,
174 uint32_t index,
175 struct nvk_shader *shader)
176 {
177 struct nvk_ies_gfx_shader hdr = {
178 .dw_count = shader->push_dw_count,
179 .vtgm_dw_count = shader->vtgm_push_dw_count,
180 };
181
182 void *map = nvk_ies_map(ies, index);
183 memcpy(map, &hdr, sizeof(hdr));
184 memcpy(map + sizeof(hdr), shader->push_dw,
185 4 * MAX2(shader->push_dw_count, shader->vtgm_push_dw_count));
186 }
187
188 static void
nvk_ies_set_pipeline(struct nvk_device * dev,struct nvk_indirect_execution_set * ies,uint32_t index,struct vk_pipeline * pipeline)189 nvk_ies_set_pipeline(struct nvk_device *dev,
190 struct nvk_indirect_execution_set *ies,
191 uint32_t index, struct vk_pipeline *pipeline)
192 {
193 switch (ies->type) {
194 case NVK_IES_TYPE_CS_QMD: {
195 struct vk_shader *vk_shader =
196 vk_pipeline_get_shader(pipeline, MESA_SHADER_COMPUTE);
197 struct nvk_shader *shader =
198 container_of(vk_shader, struct nvk_shader, vk);
199 nvk_ies_set_cs(dev, ies, index, shader);
200 break;
201 }
202
203 case NVK_IES_TYPE_GFX_PIPELINE: {
204 nvk_ies_set_gfx_pipeline(dev, ies, index, pipeline);
205 break;
206 }
207
208 default:
209 unreachable("Invalid indirect execution set type");
210 }
211 }
212
213 static void
nvk_ies_set_shader(struct nvk_device * dev,struct nvk_indirect_execution_set * ies,uint32_t index,struct nvk_shader * shader)214 nvk_ies_set_shader(struct nvk_device *dev,
215 struct nvk_indirect_execution_set *ies,
216 uint32_t index, struct nvk_shader *shader)
217 {
218 switch (ies->type) {
219 case NVK_IES_TYPE_CS_QMD: {
220 nvk_ies_set_cs(dev, ies, index, shader);
221 break;
222 }
223
224 case NVK_IES_TYPE_GFX_SHADER: {
225 nvk_ies_set_gfx_shader(dev, ies, index, shader);
226 break;
227 }
228
229 default:
230 unreachable("Invalid indirect execution set type");
231 }
232 }
233
234 VKAPI_ATTR VkResult VKAPI_CALL
nvk_CreateIndirectExecutionSetEXT(VkDevice _device,const VkIndirectExecutionSetCreateInfoEXT * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkIndirectExecutionSetEXT * pIndirectExecutionSet)235 nvk_CreateIndirectExecutionSetEXT(VkDevice _device,
236 const VkIndirectExecutionSetCreateInfoEXT *pCreateInfo,
237 const VkAllocationCallbacks *pAllocator,
238 VkIndirectExecutionSetEXT *pIndirectExecutionSet)
239 {
240 VK_FROM_HANDLE(nvk_device, dev, _device);
241 struct nvk_physical_device *pdev = nvk_device_physical(dev);
242 VkResult result;
243
244 struct nvk_indirect_execution_set *ies =
245 vk_object_zalloc(&dev->vk, pAllocator, sizeof(*ies),
246 VK_OBJECT_TYPE_INDIRECT_EXECUTION_SET_EXT);
247 if (ies == NULL)
248 return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
249
250 switch (pCreateInfo->type) {
251 case VK_INDIRECT_EXECUTION_SET_INFO_TYPE_PIPELINES_EXT: {
252 VK_FROM_HANDLE(vk_pipeline, pipeline,
253 pCreateInfo->info.pPipelineInfo->initialPipeline);
254
255 ies->count = pCreateInfo->info.pPipelineInfo->maxPipelineCount;
256 if (pipeline->stages & VK_SHADER_STAGE_COMPUTE_BIT) {
257 assert(pipeline->stages == VK_SHADER_STAGE_COMPUTE_BIT);
258 ies->type = NVK_IES_TYPE_CS_QMD;
259 ies->stride_B = sizeof(struct nvk_ies_cs_qmd);
260 } else if (pipeline->stages & NVK_SHADER_STAGE_GRAPHICS_BITS) {
261 assert(!(pipeline->stages & ~NVK_SHADER_STAGE_GRAPHICS_BITS));
262 ies->type = NVK_IES_TYPE_GFX_PIPELINE;
263 ies->stride_B = nvk_ies_stride_gfx_pipeline(pdev, pipeline->stages);
264 } else {
265 unreachable("Unknown shader stage");
266 }
267 break;
268 }
269
270 case VK_INDIRECT_EXECUTION_SET_INFO_TYPE_SHADER_OBJECTS_EXT: {
271 const VkIndirectExecutionSetShaderInfoEXT *info =
272 pCreateInfo->info.pShaderInfo;
273
274 VkShaderStageFlags stages = 0;
275 for (uint32_t i = 0; i < info->shaderCount; i++) {
276 VK_FROM_HANDLE(nvk_shader, shader, info->pInitialShaders[i]);
277 stages |= mesa_to_vk_shader_stage(shader->vk.stage);
278 }
279
280 ies->count = info->maxShaderCount;
281 if (stages & VK_SHADER_STAGE_COMPUTE_BIT) {
282 assert(stages == VK_SHADER_STAGE_COMPUTE_BIT);
283 ies->type = NVK_IES_TYPE_CS_QMD;
284 ies->stride_B = sizeof(struct nvk_ies_cs_qmd);
285 } else if (stages & NVK_SHADER_STAGE_GRAPHICS_BITS) {
286 assert(!(stages & ~NVK_SHADER_STAGE_GRAPHICS_BITS));
287 ies->type = NVK_IES_TYPE_GFX_SHADER;
288 ies->stride_B = nvk_ies_stride_gfx_shader(pdev, stages);
289 } else {
290 unreachable("Unknown shader stage");
291 }
292 break;
293 }
294
295 default:
296 unreachable("Unknown indirect execution set info type");
297 }
298
299 size_t size = ies->count * (size_t)ies->stride_B;
300 result = nvkmd_dev_alloc_mapped_mem(dev->nvkmd, &dev->vk.base,
301 size, 0, NVKMD_MEM_LOCAL,
302 NVKMD_MEM_MAP_WR, &ies->mem);
303 if (result != VK_SUCCESS) {
304 vk_object_free(&dev->vk, pAllocator, ies);
305 return result;
306 }
307
308 switch (pCreateInfo->type) {
309 case VK_INDIRECT_EXECUTION_SET_INFO_TYPE_PIPELINES_EXT: {
310 VK_FROM_HANDLE(vk_pipeline, pipeline,
311 pCreateInfo->info.pPipelineInfo->initialPipeline);
312 nvk_ies_set_pipeline(dev, ies, 0, pipeline);
313 break;
314 }
315
316 case VK_INDIRECT_EXECUTION_SET_INFO_TYPE_SHADER_OBJECTS_EXT: {
317 const VkIndirectExecutionSetShaderInfoEXT *info =
318 pCreateInfo->info.pShaderInfo;
319
320 for (uint32_t i = 0; i < info->shaderCount; i++) {
321 VK_FROM_HANDLE(nvk_shader, shader, info->pInitialShaders[i]);
322 nvk_ies_set_shader(dev, ies, i, shader);
323 }
324 break;
325 }
326
327 default:
328 unreachable("Unknown indirect execution set info type");
329 }
330
331 *pIndirectExecutionSet = nvk_indirect_execution_set_to_handle(ies);
332
333 return VK_SUCCESS;
334 }
335
336 VKAPI_ATTR void VKAPI_CALL
nvk_DestroyIndirectExecutionSetEXT(VkDevice _device,VkIndirectExecutionSetEXT indirectExecutionSet,const VkAllocationCallbacks * pAllocator)337 nvk_DestroyIndirectExecutionSetEXT(VkDevice _device,
338 VkIndirectExecutionSetEXT indirectExecutionSet,
339 const VkAllocationCallbacks *pAllocator)
340 {
341 VK_FROM_HANDLE(nvk_device, dev, _device);
342 VK_FROM_HANDLE(nvk_indirect_execution_set, ies, indirectExecutionSet);
343
344 if (ies == NULL)
345 return;
346
347 nvkmd_mem_unref(ies->mem);
348
349 vk_object_free(&dev->vk, pAllocator, ies);
350 }
351
352 VKAPI_ATTR void VKAPI_CALL
nvk_UpdateIndirectExecutionSetPipelineEXT(VkDevice _device,VkIndirectExecutionSetEXT indirectExecutionSet,uint32_t executionSetWriteCount,const VkWriteIndirectExecutionSetPipelineEXT * pExecutionSetWrites)353 nvk_UpdateIndirectExecutionSetPipelineEXT(
354 VkDevice _device,
355 VkIndirectExecutionSetEXT indirectExecutionSet,
356 uint32_t executionSetWriteCount,
357 const VkWriteIndirectExecutionSetPipelineEXT *pExecutionSetWrites)
358 {
359 VK_FROM_HANDLE(nvk_device, dev, _device);
360 VK_FROM_HANDLE(nvk_indirect_execution_set, ies, indirectExecutionSet);
361
362 for (uint32_t i = 0; i < executionSetWriteCount; i++) {
363 VK_FROM_HANDLE(vk_pipeline, pipeline, pExecutionSetWrites[i].pipeline);
364 nvk_ies_set_pipeline(dev, ies, pExecutionSetWrites[i].index, pipeline);
365 }
366 }
367
368 VKAPI_ATTR void VKAPI_CALL
nvk_UpdateIndirectExecutionSetShaderEXT(VkDevice _device,VkIndirectExecutionSetEXT indirectExecutionSet,uint32_t executionSetWriteCount,const VkWriteIndirectExecutionSetShaderEXT * pExecutionSetWrites)369 nvk_UpdateIndirectExecutionSetShaderEXT(
370 VkDevice _device,
371 VkIndirectExecutionSetEXT indirectExecutionSet,
372 uint32_t executionSetWriteCount,
373 const VkWriteIndirectExecutionSetShaderEXT *pExecutionSetWrites)
374 {
375 VK_FROM_HANDLE(nvk_device, dev, _device);
376 VK_FROM_HANDLE(nvk_indirect_execution_set, ies, indirectExecutionSet);
377
378 for (uint32_t i = 0; i < executionSetWriteCount; i++) {
379 VK_FROM_HANDLE(nvk_shader, shader, pExecutionSetWrites[i].shader);
380 nvk_ies_set_shader(dev, ies, pExecutionSetWrites[i].index, shader);
381 }
382 }
383