1 #include "radv_meta.h"
2 #include "nir/nir_builder.h"
3
4 #include "sid.h"
5 #include "radv_cs.h"
6
7 /*
8 * This is the point we switch from using CP to compute shader
9 * for certain buffer operations.
10 */
11 #define RADV_BUFFER_OPS_CS_THRESHOLD 4096
12
13 static nir_shader *
build_buffer_fill_shader(struct radv_device * dev)14 build_buffer_fill_shader(struct radv_device *dev)
15 {
16 nir_builder b;
17
18 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
19 b.shader->info.name = ralloc_strdup(b.shader, "meta_buffer_fill");
20 b.shader->info.cs.local_size[0] = 64;
21 b.shader->info.cs.local_size[1] = 1;
22 b.shader->info.cs.local_size[2] = 1;
23
24 nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
25 nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
26 nir_ssa_def *block_size = nir_imm_ivec4(&b,
27 b.shader->info.cs.local_size[0],
28 b.shader->info.cs.local_size[1],
29 b.shader->info.cs.local_size[2], 0);
30
31 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
32
33 nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));
34 offset = nir_swizzle(&b, offset, (unsigned[]) {0, 0, 0, 0}, 1, false);
35
36 nir_intrinsic_instr *dst_buf = nir_intrinsic_instr_create(b.shader,
37 nir_intrinsic_vulkan_resource_index);
38 dst_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
39 nir_intrinsic_set_desc_set(dst_buf, 0);
40 nir_intrinsic_set_binding(dst_buf, 0);
41 nir_ssa_dest_init(&dst_buf->instr, &dst_buf->dest, 1, 32, NULL);
42 nir_builder_instr_insert(&b, &dst_buf->instr);
43
44 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
45 nir_intrinsic_set_base(load, 0);
46 nir_intrinsic_set_range(load, 4);
47 load->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
48 load->num_components = 1;
49 nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "fill_value");
50 nir_builder_instr_insert(&b, &load->instr);
51
52 nir_ssa_def *swizzled_load = nir_swizzle(&b, &load->dest.ssa, (unsigned[]) { 0, 0, 0, 0}, 4, false);
53
54 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
55 store->src[0] = nir_src_for_ssa(swizzled_load);
56 store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa);
57 store->src[2] = nir_src_for_ssa(offset);
58 nir_intrinsic_set_write_mask(store, 0xf);
59 store->num_components = 4;
60 nir_builder_instr_insert(&b, &store->instr);
61
62 return b.shader;
63 }
64
65 static nir_shader *
build_buffer_copy_shader(struct radv_device * dev)66 build_buffer_copy_shader(struct radv_device *dev)
67 {
68 nir_builder b;
69
70 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
71 b.shader->info.name = ralloc_strdup(b.shader, "meta_buffer_copy");
72 b.shader->info.cs.local_size[0] = 64;
73 b.shader->info.cs.local_size[1] = 1;
74 b.shader->info.cs.local_size[2] = 1;
75
76 nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
77 nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
78 nir_ssa_def *block_size = nir_imm_ivec4(&b,
79 b.shader->info.cs.local_size[0],
80 b.shader->info.cs.local_size[1],
81 b.shader->info.cs.local_size[2], 0);
82
83 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
84
85 nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));
86 offset = nir_swizzle(&b, offset, (unsigned[]) {0, 0, 0, 0}, 1, false);
87
88 nir_intrinsic_instr *dst_buf = nir_intrinsic_instr_create(b.shader,
89 nir_intrinsic_vulkan_resource_index);
90 dst_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
91 nir_intrinsic_set_desc_set(dst_buf, 0);
92 nir_intrinsic_set_binding(dst_buf, 0);
93 nir_ssa_dest_init(&dst_buf->instr, &dst_buf->dest, 1, 32, NULL);
94 nir_builder_instr_insert(&b, &dst_buf->instr);
95
96 nir_intrinsic_instr *src_buf = nir_intrinsic_instr_create(b.shader,
97 nir_intrinsic_vulkan_resource_index);
98 src_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
99 nir_intrinsic_set_desc_set(src_buf, 0);
100 nir_intrinsic_set_binding(src_buf, 1);
101 nir_ssa_dest_init(&src_buf->instr, &src_buf->dest, 1, 32, NULL);
102 nir_builder_instr_insert(&b, &src_buf->instr);
103
104 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ssbo);
105 load->src[0] = nir_src_for_ssa(&src_buf->dest.ssa);
106 load->src[1] = nir_src_for_ssa(offset);
107 nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
108 load->num_components = 4;
109 nir_builder_instr_insert(&b, &load->instr);
110
111 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
112 store->src[0] = nir_src_for_ssa(&load->dest.ssa);
113 store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa);
114 store->src[2] = nir_src_for_ssa(offset);
115 nir_intrinsic_set_write_mask(store, 0xf);
116 store->num_components = 4;
117 nir_builder_instr_insert(&b, &store->instr);
118
119 return b.shader;
120 }
121
122
123
radv_device_init_meta_buffer_state(struct radv_device * device)124 VkResult radv_device_init_meta_buffer_state(struct radv_device *device)
125 {
126 VkResult result;
127 struct radv_shader_module fill_cs = { .nir = NULL };
128 struct radv_shader_module copy_cs = { .nir = NULL };
129
130 fill_cs.nir = build_buffer_fill_shader(device);
131 copy_cs.nir = build_buffer_copy_shader(device);
132
133 VkDescriptorSetLayoutCreateInfo fill_ds_create_info = {
134 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
135 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
136 .bindingCount = 1,
137 .pBindings = (VkDescriptorSetLayoutBinding[]) {
138 {
139 .binding = 0,
140 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
141 .descriptorCount = 1,
142 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
143 .pImmutableSamplers = NULL
144 },
145 }
146 };
147
148 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
149 &fill_ds_create_info,
150 &device->meta_state.alloc,
151 &device->meta_state.buffer.fill_ds_layout);
152 if (result != VK_SUCCESS)
153 goto fail;
154
155 VkDescriptorSetLayoutCreateInfo copy_ds_create_info = {
156 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
157 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
158 .bindingCount = 2,
159 .pBindings = (VkDescriptorSetLayoutBinding[]) {
160 {
161 .binding = 0,
162 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
163 .descriptorCount = 1,
164 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
165 .pImmutableSamplers = NULL
166 },
167 {
168 .binding = 1,
169 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
170 .descriptorCount = 1,
171 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
172 .pImmutableSamplers = NULL
173 },
174 }
175 };
176
177 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
178 ©_ds_create_info,
179 &device->meta_state.alloc,
180 &device->meta_state.buffer.copy_ds_layout);
181 if (result != VK_SUCCESS)
182 goto fail;
183
184
185 VkPipelineLayoutCreateInfo fill_pl_create_info = {
186 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
187 .setLayoutCount = 1,
188 .pSetLayouts = &device->meta_state.buffer.fill_ds_layout,
189 .pushConstantRangeCount = 1,
190 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 4},
191 };
192
193 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
194 &fill_pl_create_info,
195 &device->meta_state.alloc,
196 &device->meta_state.buffer.fill_p_layout);
197 if (result != VK_SUCCESS)
198 goto fail;
199
200 VkPipelineLayoutCreateInfo copy_pl_create_info = {
201 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
202 .setLayoutCount = 1,
203 .pSetLayouts = &device->meta_state.buffer.copy_ds_layout,
204 .pushConstantRangeCount = 0,
205 };
206
207 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
208 ©_pl_create_info,
209 &device->meta_state.alloc,
210 &device->meta_state.buffer.copy_p_layout);
211 if (result != VK_SUCCESS)
212 goto fail;
213
214 VkPipelineShaderStageCreateInfo fill_pipeline_shader_stage = {
215 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
216 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
217 .module = radv_shader_module_to_handle(&fill_cs),
218 .pName = "main",
219 .pSpecializationInfo = NULL,
220 };
221
222 VkComputePipelineCreateInfo fill_vk_pipeline_info = {
223 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
224 .stage = fill_pipeline_shader_stage,
225 .flags = 0,
226 .layout = device->meta_state.buffer.fill_p_layout,
227 };
228
229 result = radv_CreateComputePipelines(radv_device_to_handle(device),
230 radv_pipeline_cache_to_handle(&device->meta_state.cache),
231 1, &fill_vk_pipeline_info, NULL,
232 &device->meta_state.buffer.fill_pipeline);
233 if (result != VK_SUCCESS)
234 goto fail;
235
236 VkPipelineShaderStageCreateInfo copy_pipeline_shader_stage = {
237 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
238 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
239 .module = radv_shader_module_to_handle(©_cs),
240 .pName = "main",
241 .pSpecializationInfo = NULL,
242 };
243
244 VkComputePipelineCreateInfo copy_vk_pipeline_info = {
245 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
246 .stage = copy_pipeline_shader_stage,
247 .flags = 0,
248 .layout = device->meta_state.buffer.copy_p_layout,
249 };
250
251 result = radv_CreateComputePipelines(radv_device_to_handle(device),
252 radv_pipeline_cache_to_handle(&device->meta_state.cache),
253 1, ©_vk_pipeline_info, NULL,
254 &device->meta_state.buffer.copy_pipeline);
255 if (result != VK_SUCCESS)
256 goto fail;
257
258 ralloc_free(fill_cs.nir);
259 ralloc_free(copy_cs.nir);
260 return VK_SUCCESS;
261 fail:
262 radv_device_finish_meta_buffer_state(device);
263 ralloc_free(fill_cs.nir);
264 ralloc_free(copy_cs.nir);
265 return result;
266 }
267
radv_device_finish_meta_buffer_state(struct radv_device * device)268 void radv_device_finish_meta_buffer_state(struct radv_device *device)
269 {
270 struct radv_meta_state *state = &device->meta_state;
271
272 radv_DestroyPipeline(radv_device_to_handle(device),
273 state->buffer.copy_pipeline, &state->alloc);
274 radv_DestroyPipeline(radv_device_to_handle(device),
275 state->buffer.fill_pipeline, &state->alloc);
276 radv_DestroyPipelineLayout(radv_device_to_handle(device),
277 state->buffer.copy_p_layout, &state->alloc);
278 radv_DestroyPipelineLayout(radv_device_to_handle(device),
279 state->buffer.fill_p_layout, &state->alloc);
280 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
281 state->buffer.copy_ds_layout,
282 &state->alloc);
283 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
284 state->buffer.fill_ds_layout,
285 &state->alloc);
286 }
287
fill_buffer_shader(struct radv_cmd_buffer * cmd_buffer,struct radeon_winsys_bo * bo,uint64_t offset,uint64_t size,uint32_t value)288 static void fill_buffer_shader(struct radv_cmd_buffer *cmd_buffer,
289 struct radeon_winsys_bo *bo,
290 uint64_t offset, uint64_t size, uint32_t value)
291 {
292 struct radv_device *device = cmd_buffer->device;
293 uint64_t block_count = round_up_u64(size, 1024);
294 struct radv_meta_saved_state saved_state;
295
296 radv_meta_save(&saved_state, cmd_buffer,
297 RADV_META_SAVE_COMPUTE_PIPELINE |
298 RADV_META_SAVE_CONSTANTS |
299 RADV_META_SAVE_DESCRIPTORS);
300
301 struct radv_buffer dst_buffer = {
302 .bo = bo,
303 .offset = offset,
304 .size = size
305 };
306
307 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
308 VK_PIPELINE_BIND_POINT_COMPUTE,
309 device->meta_state.buffer.fill_pipeline);
310
311 radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
312 device->meta_state.buffer.fill_p_layout,
313 0, /* set */
314 1, /* descriptorWriteCount */
315 (VkWriteDescriptorSet[]) {
316 {
317 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
318 .dstBinding = 0,
319 .dstArrayElement = 0,
320 .descriptorCount = 1,
321 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
322 .pBufferInfo = &(VkDescriptorBufferInfo) {
323 .buffer = radv_buffer_to_handle(&dst_buffer),
324 .offset = 0,
325 .range = size
326 }
327 }
328 });
329
330 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
331 device->meta_state.buffer.fill_p_layout,
332 VK_SHADER_STAGE_COMPUTE_BIT, 0, 4,
333 &value);
334
335 radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
336
337 radv_meta_restore(&saved_state, cmd_buffer);
338 }
339
copy_buffer_shader(struct radv_cmd_buffer * cmd_buffer,struct radeon_winsys_bo * src_bo,struct radeon_winsys_bo * dst_bo,uint64_t src_offset,uint64_t dst_offset,uint64_t size)340 static void copy_buffer_shader(struct radv_cmd_buffer *cmd_buffer,
341 struct radeon_winsys_bo *src_bo,
342 struct radeon_winsys_bo *dst_bo,
343 uint64_t src_offset, uint64_t dst_offset,
344 uint64_t size)
345 {
346 struct radv_device *device = cmd_buffer->device;
347 uint64_t block_count = round_up_u64(size, 1024);
348 struct radv_meta_saved_state saved_state;
349
350 radv_meta_save(&saved_state, cmd_buffer,
351 RADV_META_SAVE_COMPUTE_PIPELINE |
352 RADV_META_SAVE_DESCRIPTORS);
353
354 struct radv_buffer dst_buffer = {
355 .bo = dst_bo,
356 .offset = dst_offset,
357 .size = size
358 };
359
360 struct radv_buffer src_buffer = {
361 .bo = src_bo,
362 .offset = src_offset,
363 .size = size
364 };
365
366 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
367 VK_PIPELINE_BIND_POINT_COMPUTE,
368 device->meta_state.buffer.copy_pipeline);
369
370 radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
371 device->meta_state.buffer.copy_p_layout,
372 0, /* set */
373 2, /* descriptorWriteCount */
374 (VkWriteDescriptorSet[]) {
375 {
376 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
377 .dstBinding = 0,
378 .dstArrayElement = 0,
379 .descriptorCount = 1,
380 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
381 .pBufferInfo = &(VkDescriptorBufferInfo) {
382 .buffer = radv_buffer_to_handle(&dst_buffer),
383 .offset = 0,
384 .range = size
385 }
386 },
387 {
388 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
389 .dstBinding = 1,
390 .dstArrayElement = 0,
391 .descriptorCount = 1,
392 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
393 .pBufferInfo = &(VkDescriptorBufferInfo) {
394 .buffer = radv_buffer_to_handle(&src_buffer),
395 .offset = 0,
396 .range = size
397 }
398 }
399 });
400
401 radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
402
403 radv_meta_restore(&saved_state, cmd_buffer);
404 }
405
406
radv_fill_buffer(struct radv_cmd_buffer * cmd_buffer,struct radeon_winsys_bo * bo,uint64_t offset,uint64_t size,uint32_t value)407 uint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer,
408 struct radeon_winsys_bo *bo,
409 uint64_t offset, uint64_t size, uint32_t value)
410 {
411 uint32_t flush_bits = 0;
412
413 assert(!(offset & 3));
414 assert(!(size & 3));
415
416 if (size >= RADV_BUFFER_OPS_CS_THRESHOLD) {
417 fill_buffer_shader(cmd_buffer, bo, offset, size, value);
418 flush_bits = RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
419 RADV_CMD_FLAG_INV_VMEM_L1 |
420 RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2;
421 } else if (size) {
422 uint64_t va = radv_buffer_get_va(bo);
423 va += offset;
424 radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, bo, 8);
425 si_cp_dma_clear_buffer(cmd_buffer, va, size, value);
426 }
427
428 return flush_bits;
429 }
430
431 static
radv_copy_buffer(struct radv_cmd_buffer * cmd_buffer,struct radeon_winsys_bo * src_bo,struct radeon_winsys_bo * dst_bo,uint64_t src_offset,uint64_t dst_offset,uint64_t size)432 void radv_copy_buffer(struct radv_cmd_buffer *cmd_buffer,
433 struct radeon_winsys_bo *src_bo,
434 struct radeon_winsys_bo *dst_bo,
435 uint64_t src_offset, uint64_t dst_offset,
436 uint64_t size)
437 {
438 if (size >= RADV_BUFFER_OPS_CS_THRESHOLD && !(size & 3) && !(src_offset & 3) && !(dst_offset & 3))
439 copy_buffer_shader(cmd_buffer, src_bo, dst_bo,
440 src_offset, dst_offset, size);
441 else if (size) {
442 uint64_t src_va = radv_buffer_get_va(src_bo);
443 uint64_t dst_va = radv_buffer_get_va(dst_bo);
444 src_va += src_offset;
445 dst_va += dst_offset;
446
447 radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, src_bo, 8);
448 radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dst_bo, 8);
449
450 si_cp_dma_buffer_copy(cmd_buffer, src_va, dst_va, size);
451 }
452 }
453
radv_CmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize fillSize,uint32_t data)454 void radv_CmdFillBuffer(
455 VkCommandBuffer commandBuffer,
456 VkBuffer dstBuffer,
457 VkDeviceSize dstOffset,
458 VkDeviceSize fillSize,
459 uint32_t data)
460 {
461 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
462 RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
463
464 if (fillSize == VK_WHOLE_SIZE)
465 fillSize = (dst_buffer->size - dstOffset) & ~3ull;
466
467 radv_fill_buffer(cmd_buffer, dst_buffer->bo, dst_buffer->offset + dstOffset,
468 fillSize, data);
469 }
470
radv_CmdCopyBuffer(VkCommandBuffer commandBuffer,VkBuffer srcBuffer,VkBuffer destBuffer,uint32_t regionCount,const VkBufferCopy * pRegions)471 void radv_CmdCopyBuffer(
472 VkCommandBuffer commandBuffer,
473 VkBuffer srcBuffer,
474 VkBuffer destBuffer,
475 uint32_t regionCount,
476 const VkBufferCopy* pRegions)
477 {
478 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
479 RADV_FROM_HANDLE(radv_buffer, src_buffer, srcBuffer);
480 RADV_FROM_HANDLE(radv_buffer, dest_buffer, destBuffer);
481
482 for (unsigned r = 0; r < regionCount; r++) {
483 uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset;
484 uint64_t dest_offset = dest_buffer->offset + pRegions[r].dstOffset;
485 uint64_t copy_size = pRegions[r].size;
486
487 radv_copy_buffer(cmd_buffer, src_buffer->bo, dest_buffer->bo,
488 src_offset, dest_offset, copy_size);
489 }
490 }
491
radv_CmdUpdateBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize dataSize,const void * pData)492 void radv_CmdUpdateBuffer(
493 VkCommandBuffer commandBuffer,
494 VkBuffer dstBuffer,
495 VkDeviceSize dstOffset,
496 VkDeviceSize dataSize,
497 const void* pData)
498 {
499 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
500 RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
501 bool mec = radv_cmd_buffer_uses_mec(cmd_buffer);
502 uint64_t words = dataSize / 4;
503 uint64_t va = radv_buffer_get_va(dst_buffer->bo);
504 va += dstOffset + dst_buffer->offset;
505
506 assert(!(dataSize & 3));
507 assert(!(va & 3));
508
509 if (!dataSize)
510 return;
511
512 if (dataSize < RADV_BUFFER_OPS_CS_THRESHOLD) {
513 si_emit_cache_flush(cmd_buffer);
514
515 radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dst_buffer->bo, 8);
516
517 radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, words + 4);
518
519 radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + words, 0));
520 radeon_emit(cmd_buffer->cs, S_370_DST_SEL(mec ?
521 V_370_MEM_ASYNC : V_370_MEMORY_SYNC) |
522 S_370_WR_CONFIRM(1) |
523 S_370_ENGINE_SEL(V_370_ME));
524 radeon_emit(cmd_buffer->cs, va);
525 radeon_emit(cmd_buffer->cs, va >> 32);
526 radeon_emit_array(cmd_buffer->cs, pData, words);
527
528 if (unlikely(cmd_buffer->device->trace_bo))
529 radv_cmd_buffer_trace_emit(cmd_buffer);
530 } else {
531 uint32_t buf_offset;
532 radv_cmd_buffer_upload_data(cmd_buffer, dataSize, 32, pData, &buf_offset);
533 radv_copy_buffer(cmd_buffer, cmd_buffer->upload.upload_bo, dst_buffer->bo,
534 buf_offset, dstOffset + dst_buffer->offset, dataSize);
535 }
536 }
537