1 #include "radv_meta.h"
2 #include "nir/nir_builder.h"
3
4 #include "sid.h"
5 #include "radv_cs.h"
6
7 static nir_shader *
build_buffer_fill_shader(struct radv_device * dev)8 build_buffer_fill_shader(struct radv_device *dev)
9 {
10 nir_builder b;
11
12 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
13 b.shader->info.name = ralloc_strdup(b.shader, "meta_buffer_fill");
14 b.shader->info.cs.local_size[0] = 64;
15 b.shader->info.cs.local_size[1] = 1;
16 b.shader->info.cs.local_size[2] = 1;
17
18 nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
19 nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
20 nir_ssa_def *block_size = nir_imm_ivec4(&b,
21 b.shader->info.cs.local_size[0],
22 b.shader->info.cs.local_size[1],
23 b.shader->info.cs.local_size[2], 0);
24
25 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
26
27 nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));
28 offset = nir_channel(&b, offset, 0);
29
30 nir_ssa_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
31
32 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
33 nir_intrinsic_set_base(load, 0);
34 nir_intrinsic_set_range(load, 4);
35 load->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
36 load->num_components = 1;
37 nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "fill_value");
38 nir_builder_instr_insert(&b, &load->instr);
39
40 nir_ssa_def *swizzled_load = nir_swizzle(&b, &load->dest.ssa, (unsigned[]) { 0, 0, 0, 0}, 4);
41
42 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
43 store->src[0] = nir_src_for_ssa(swizzled_load);
44 store->src[1] = nir_src_for_ssa(dst_buf);
45 store->src[2] = nir_src_for_ssa(offset);
46 nir_intrinsic_set_write_mask(store, 0xf);
47 nir_intrinsic_set_access(store, ACCESS_NON_READABLE);
48 nir_intrinsic_set_align(store, 16, 0);
49 store->num_components = 4;
50 nir_builder_instr_insert(&b, &store->instr);
51
52 return b.shader;
53 }
54
55 static nir_shader *
build_buffer_copy_shader(struct radv_device * dev)56 build_buffer_copy_shader(struct radv_device *dev)
57 {
58 nir_builder b;
59
60 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
61 b.shader->info.name = ralloc_strdup(b.shader, "meta_buffer_copy");
62 b.shader->info.cs.local_size[0] = 64;
63 b.shader->info.cs.local_size[1] = 1;
64 b.shader->info.cs.local_size[2] = 1;
65
66 nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
67 nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
68 nir_ssa_def *block_size = nir_imm_ivec4(&b,
69 b.shader->info.cs.local_size[0],
70 b.shader->info.cs.local_size[1],
71 b.shader->info.cs.local_size[2], 0);
72
73 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
74
75 nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));
76 offset = nir_channel(&b, offset, 0);
77
78 nir_ssa_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
79 nir_ssa_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
80
81 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ssbo);
82 load->src[0] = nir_src_for_ssa(src_buf);
83 load->src[1] = nir_src_for_ssa(offset);
84 nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
85 load->num_components = 4;
86 nir_intrinsic_set_align(load, 16, 0);
87 nir_builder_instr_insert(&b, &load->instr);
88
89 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
90 store->src[0] = nir_src_for_ssa(&load->dest.ssa);
91 store->src[1] = nir_src_for_ssa(dst_buf);
92 store->src[2] = nir_src_for_ssa(offset);
93 nir_intrinsic_set_write_mask(store, 0xf);
94 nir_intrinsic_set_access(store, ACCESS_NON_READABLE);
95 nir_intrinsic_set_align(store, 16, 0);
96 store->num_components = 4;
97 nir_builder_instr_insert(&b, &store->instr);
98
99 return b.shader;
100 }
101
102
103
radv_device_init_meta_buffer_state(struct radv_device * device)104 VkResult radv_device_init_meta_buffer_state(struct radv_device *device)
105 {
106 VkResult result;
107 struct radv_shader_module fill_cs = { .nir = NULL };
108 struct radv_shader_module copy_cs = { .nir = NULL };
109
110 fill_cs.nir = build_buffer_fill_shader(device);
111 copy_cs.nir = build_buffer_copy_shader(device);
112
113 VkDescriptorSetLayoutCreateInfo fill_ds_create_info = {
114 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
115 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
116 .bindingCount = 1,
117 .pBindings = (VkDescriptorSetLayoutBinding[]) {
118 {
119 .binding = 0,
120 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
121 .descriptorCount = 1,
122 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
123 .pImmutableSamplers = NULL
124 },
125 }
126 };
127
128 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
129 &fill_ds_create_info,
130 &device->meta_state.alloc,
131 &device->meta_state.buffer.fill_ds_layout);
132 if (result != VK_SUCCESS)
133 goto fail;
134
135 VkDescriptorSetLayoutCreateInfo copy_ds_create_info = {
136 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
137 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
138 .bindingCount = 2,
139 .pBindings = (VkDescriptorSetLayoutBinding[]) {
140 {
141 .binding = 0,
142 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
143 .descriptorCount = 1,
144 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
145 .pImmutableSamplers = NULL
146 },
147 {
148 .binding = 1,
149 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
150 .descriptorCount = 1,
151 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
152 .pImmutableSamplers = NULL
153 },
154 }
155 };
156
157 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
158 ©_ds_create_info,
159 &device->meta_state.alloc,
160 &device->meta_state.buffer.copy_ds_layout);
161 if (result != VK_SUCCESS)
162 goto fail;
163
164
165 VkPipelineLayoutCreateInfo fill_pl_create_info = {
166 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
167 .setLayoutCount = 1,
168 .pSetLayouts = &device->meta_state.buffer.fill_ds_layout,
169 .pushConstantRangeCount = 1,
170 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 4},
171 };
172
173 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
174 &fill_pl_create_info,
175 &device->meta_state.alloc,
176 &device->meta_state.buffer.fill_p_layout);
177 if (result != VK_SUCCESS)
178 goto fail;
179
180 VkPipelineLayoutCreateInfo copy_pl_create_info = {
181 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
182 .setLayoutCount = 1,
183 .pSetLayouts = &device->meta_state.buffer.copy_ds_layout,
184 .pushConstantRangeCount = 0,
185 };
186
187 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
188 ©_pl_create_info,
189 &device->meta_state.alloc,
190 &device->meta_state.buffer.copy_p_layout);
191 if (result != VK_SUCCESS)
192 goto fail;
193
194 VkPipelineShaderStageCreateInfo fill_pipeline_shader_stage = {
195 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
196 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
197 .module = radv_shader_module_to_handle(&fill_cs),
198 .pName = "main",
199 .pSpecializationInfo = NULL,
200 };
201
202 VkComputePipelineCreateInfo fill_vk_pipeline_info = {
203 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
204 .stage = fill_pipeline_shader_stage,
205 .flags = 0,
206 .layout = device->meta_state.buffer.fill_p_layout,
207 };
208
209 result = radv_CreateComputePipelines(radv_device_to_handle(device),
210 radv_pipeline_cache_to_handle(&device->meta_state.cache),
211 1, &fill_vk_pipeline_info, NULL,
212 &device->meta_state.buffer.fill_pipeline);
213 if (result != VK_SUCCESS)
214 goto fail;
215
216 VkPipelineShaderStageCreateInfo copy_pipeline_shader_stage = {
217 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
218 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
219 .module = radv_shader_module_to_handle(©_cs),
220 .pName = "main",
221 .pSpecializationInfo = NULL,
222 };
223
224 VkComputePipelineCreateInfo copy_vk_pipeline_info = {
225 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
226 .stage = copy_pipeline_shader_stage,
227 .flags = 0,
228 .layout = device->meta_state.buffer.copy_p_layout,
229 };
230
231 result = radv_CreateComputePipelines(radv_device_to_handle(device),
232 radv_pipeline_cache_to_handle(&device->meta_state.cache),
233 1, ©_vk_pipeline_info, NULL,
234 &device->meta_state.buffer.copy_pipeline);
235 if (result != VK_SUCCESS)
236 goto fail;
237
238 ralloc_free(fill_cs.nir);
239 ralloc_free(copy_cs.nir);
240 return VK_SUCCESS;
241 fail:
242 radv_device_finish_meta_buffer_state(device);
243 ralloc_free(fill_cs.nir);
244 ralloc_free(copy_cs.nir);
245 return result;
246 }
247
radv_device_finish_meta_buffer_state(struct radv_device * device)248 void radv_device_finish_meta_buffer_state(struct radv_device *device)
249 {
250 struct radv_meta_state *state = &device->meta_state;
251
252 radv_DestroyPipeline(radv_device_to_handle(device),
253 state->buffer.copy_pipeline, &state->alloc);
254 radv_DestroyPipeline(radv_device_to_handle(device),
255 state->buffer.fill_pipeline, &state->alloc);
256 radv_DestroyPipelineLayout(radv_device_to_handle(device),
257 state->buffer.copy_p_layout, &state->alloc);
258 radv_DestroyPipelineLayout(radv_device_to_handle(device),
259 state->buffer.fill_p_layout, &state->alloc);
260 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
261 state->buffer.copy_ds_layout,
262 &state->alloc);
263 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
264 state->buffer.fill_ds_layout,
265 &state->alloc);
266 }
267
fill_buffer_shader(struct radv_cmd_buffer * cmd_buffer,struct radeon_winsys_bo * bo,uint64_t offset,uint64_t size,uint32_t value)268 static void fill_buffer_shader(struct radv_cmd_buffer *cmd_buffer,
269 struct radeon_winsys_bo *bo,
270 uint64_t offset, uint64_t size, uint32_t value)
271 {
272 struct radv_device *device = cmd_buffer->device;
273 uint64_t block_count = round_up_u64(size, 1024);
274 struct radv_meta_saved_state saved_state;
275
276 radv_meta_save(&saved_state, cmd_buffer,
277 RADV_META_SAVE_COMPUTE_PIPELINE |
278 RADV_META_SAVE_CONSTANTS |
279 RADV_META_SAVE_DESCRIPTORS);
280
281 struct radv_buffer dst_buffer = {
282 .bo = bo,
283 .offset = offset,
284 .size = size
285 };
286
287 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
288 VK_PIPELINE_BIND_POINT_COMPUTE,
289 device->meta_state.buffer.fill_pipeline);
290
291 radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
292 device->meta_state.buffer.fill_p_layout,
293 0, /* set */
294 1, /* descriptorWriteCount */
295 (VkWriteDescriptorSet[]) {
296 {
297 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
298 .dstBinding = 0,
299 .dstArrayElement = 0,
300 .descriptorCount = 1,
301 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
302 .pBufferInfo = &(VkDescriptorBufferInfo) {
303 .buffer = radv_buffer_to_handle(&dst_buffer),
304 .offset = 0,
305 .range = size
306 }
307 }
308 });
309
310 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
311 device->meta_state.buffer.fill_p_layout,
312 VK_SHADER_STAGE_COMPUTE_BIT, 0, 4,
313 &value);
314
315 radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
316
317 radv_meta_restore(&saved_state, cmd_buffer);
318 }
319
copy_buffer_shader(struct radv_cmd_buffer * cmd_buffer,struct radeon_winsys_bo * src_bo,struct radeon_winsys_bo * dst_bo,uint64_t src_offset,uint64_t dst_offset,uint64_t size)320 static void copy_buffer_shader(struct radv_cmd_buffer *cmd_buffer,
321 struct radeon_winsys_bo *src_bo,
322 struct radeon_winsys_bo *dst_bo,
323 uint64_t src_offset, uint64_t dst_offset,
324 uint64_t size)
325 {
326 struct radv_device *device = cmd_buffer->device;
327 uint64_t block_count = round_up_u64(size, 1024);
328 struct radv_meta_saved_state saved_state;
329
330 radv_meta_save(&saved_state, cmd_buffer,
331 RADV_META_SAVE_COMPUTE_PIPELINE |
332 RADV_META_SAVE_DESCRIPTORS);
333
334 struct radv_buffer dst_buffer = {
335 .bo = dst_bo,
336 .offset = dst_offset,
337 .size = size
338 };
339
340 struct radv_buffer src_buffer = {
341 .bo = src_bo,
342 .offset = src_offset,
343 .size = size
344 };
345
346 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
347 VK_PIPELINE_BIND_POINT_COMPUTE,
348 device->meta_state.buffer.copy_pipeline);
349
350 radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
351 device->meta_state.buffer.copy_p_layout,
352 0, /* set */
353 2, /* descriptorWriteCount */
354 (VkWriteDescriptorSet[]) {
355 {
356 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
357 .dstBinding = 0,
358 .dstArrayElement = 0,
359 .descriptorCount = 1,
360 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
361 .pBufferInfo = &(VkDescriptorBufferInfo) {
362 .buffer = radv_buffer_to_handle(&dst_buffer),
363 .offset = 0,
364 .range = size
365 }
366 },
367 {
368 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
369 .dstBinding = 1,
370 .dstArrayElement = 0,
371 .descriptorCount = 1,
372 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
373 .pBufferInfo = &(VkDescriptorBufferInfo) {
374 .buffer = radv_buffer_to_handle(&src_buffer),
375 .offset = 0,
376 .range = size
377 }
378 }
379 });
380
381 radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
382
383 radv_meta_restore(&saved_state, cmd_buffer);
384 }
385
386
radv_fill_buffer(struct radv_cmd_buffer * cmd_buffer,struct radeon_winsys_bo * bo,uint64_t offset,uint64_t size,uint32_t value)387 uint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer,
388 struct radeon_winsys_bo *bo,
389 uint64_t offset, uint64_t size, uint32_t value)
390 {
391 uint32_t flush_bits = 0;
392
393 assert(!(offset & 3));
394 assert(!(size & 3));
395
396 if (size >= RADV_BUFFER_OPS_CS_THRESHOLD) {
397 fill_buffer_shader(cmd_buffer, bo, offset, size, value);
398 flush_bits = RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
399 RADV_CMD_FLAG_INV_VCACHE |
400 RADV_CMD_FLAG_WB_L2;
401 } else if (size) {
402 uint64_t va = radv_buffer_get_va(bo);
403 va += offset;
404 radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, bo);
405 si_cp_dma_clear_buffer(cmd_buffer, va, size, value);
406 }
407
408 return flush_bits;
409 }
410
411 static
radv_copy_buffer(struct radv_cmd_buffer * cmd_buffer,struct radeon_winsys_bo * src_bo,struct radeon_winsys_bo * dst_bo,uint64_t src_offset,uint64_t dst_offset,uint64_t size)412 void radv_copy_buffer(struct radv_cmd_buffer *cmd_buffer,
413 struct radeon_winsys_bo *src_bo,
414 struct radeon_winsys_bo *dst_bo,
415 uint64_t src_offset, uint64_t dst_offset,
416 uint64_t size)
417 {
418 if (size >= RADV_BUFFER_OPS_CS_THRESHOLD && !(size & 3) && !(src_offset & 3) && !(dst_offset & 3))
419 copy_buffer_shader(cmd_buffer, src_bo, dst_bo,
420 src_offset, dst_offset, size);
421 else if (size) {
422 uint64_t src_va = radv_buffer_get_va(src_bo);
423 uint64_t dst_va = radv_buffer_get_va(dst_bo);
424 src_va += src_offset;
425 dst_va += dst_offset;
426
427 radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, src_bo);
428 radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dst_bo);
429
430 si_cp_dma_buffer_copy(cmd_buffer, src_va, dst_va, size);
431 }
432 }
433
radv_CmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize fillSize,uint32_t data)434 void radv_CmdFillBuffer(
435 VkCommandBuffer commandBuffer,
436 VkBuffer dstBuffer,
437 VkDeviceSize dstOffset,
438 VkDeviceSize fillSize,
439 uint32_t data)
440 {
441 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
442 RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
443
444 if (fillSize == VK_WHOLE_SIZE)
445 fillSize = (dst_buffer->size - dstOffset) & ~3ull;
446
447 radv_fill_buffer(cmd_buffer, dst_buffer->bo, dst_buffer->offset + dstOffset,
448 fillSize, data);
449 }
450
451 static void
copy_buffer(struct radv_cmd_buffer * cmd_buffer,struct radv_buffer * src_buffer,struct radv_buffer * dst_buffer,const VkBufferCopy2KHR * region)452 copy_buffer(struct radv_cmd_buffer *cmd_buffer,
453 struct radv_buffer *src_buffer,
454 struct radv_buffer *dst_buffer,
455 const VkBufferCopy2KHR *region)
456 {
457 bool old_predicating;
458
459 /* VK_EXT_conditional_rendering says that copy commands should not be
460 * affected by conditional rendering.
461 */
462 old_predicating = cmd_buffer->state.predicating;
463 cmd_buffer->state.predicating = false;
464
465 radv_copy_buffer(cmd_buffer,
466 src_buffer->bo,
467 dst_buffer->bo,
468 src_buffer->offset + region->srcOffset,
469 dst_buffer->offset + region->dstOffset,
470 region->size);
471
472 /* Restore conditional rendering. */
473 cmd_buffer->state.predicating = old_predicating;
474 }
475
radv_CmdCopyBuffer(VkCommandBuffer commandBuffer,VkBuffer srcBuffer,VkBuffer destBuffer,uint32_t regionCount,const VkBufferCopy * pRegions)476 void radv_CmdCopyBuffer(
477 VkCommandBuffer commandBuffer,
478 VkBuffer srcBuffer,
479 VkBuffer destBuffer,
480 uint32_t regionCount,
481 const VkBufferCopy* pRegions)
482 {
483 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
484 RADV_FROM_HANDLE(radv_buffer, src_buffer, srcBuffer);
485 RADV_FROM_HANDLE(radv_buffer, dst_buffer, destBuffer);
486
487 for (unsigned r = 0; r < regionCount; r++) {
488 VkBufferCopy2KHR copy = {
489 .sType = VK_STRUCTURE_TYPE_BUFFER_COPY_2_KHR,
490 .srcOffset = pRegions[r].srcOffset,
491 .dstOffset = pRegions[r].dstOffset,
492 .size = pRegions[r].size,
493 };
494
495 copy_buffer(cmd_buffer, src_buffer, dst_buffer, ©);
496 }
497 }
498
radv_CmdCopyBuffer2KHR(VkCommandBuffer commandBuffer,const VkCopyBufferInfo2KHR * pCopyBufferInfo)499 void radv_CmdCopyBuffer2KHR(
500 VkCommandBuffer commandBuffer,
501 const VkCopyBufferInfo2KHR* pCopyBufferInfo)
502 {
503 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
504 RADV_FROM_HANDLE(radv_buffer, src_buffer, pCopyBufferInfo->srcBuffer);
505 RADV_FROM_HANDLE(radv_buffer, dst_buffer, pCopyBufferInfo->dstBuffer);
506
507 for (unsigned r = 0; r < pCopyBufferInfo->regionCount; r++) {
508 copy_buffer(cmd_buffer, src_buffer, dst_buffer,
509 &pCopyBufferInfo->pRegions[r]);
510 }
511 }
512
radv_CmdUpdateBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize dataSize,const void * pData)513 void radv_CmdUpdateBuffer(
514 VkCommandBuffer commandBuffer,
515 VkBuffer dstBuffer,
516 VkDeviceSize dstOffset,
517 VkDeviceSize dataSize,
518 const void* pData)
519 {
520 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
521 RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
522 bool mec = radv_cmd_buffer_uses_mec(cmd_buffer);
523 uint64_t words = dataSize / 4;
524 uint64_t va = radv_buffer_get_va(dst_buffer->bo);
525 va += dstOffset + dst_buffer->offset;
526
527 assert(!(dataSize & 3));
528 assert(!(va & 3));
529
530 if (!dataSize)
531 return;
532
533 if (dataSize < RADV_BUFFER_UPDATE_THRESHOLD) {
534 si_emit_cache_flush(cmd_buffer);
535
536 radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dst_buffer->bo);
537
538 radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, words + 4);
539
540 radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + words, 0));
541 radeon_emit(cmd_buffer->cs, S_370_DST_SEL(mec ?
542 V_370_MEM : V_370_MEM_GRBM) |
543 S_370_WR_CONFIRM(1) |
544 S_370_ENGINE_SEL(V_370_ME));
545 radeon_emit(cmd_buffer->cs, va);
546 radeon_emit(cmd_buffer->cs, va >> 32);
547 radeon_emit_array(cmd_buffer->cs, pData, words);
548
549 if (unlikely(cmd_buffer->device->trace_bo))
550 radv_cmd_buffer_trace_emit(cmd_buffer);
551 } else {
552 uint32_t buf_offset;
553 radv_cmd_buffer_upload_data(cmd_buffer, dataSize, 32, pData, &buf_offset);
554 radv_copy_buffer(cmd_buffer, cmd_buffer->upload.upload_bo, dst_buffer->bo,
555 buf_offset, dstOffset + dst_buffer->offset, dataSize);
556 }
557 }
558