• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "radv_meta.h"
2 #include "nir/nir_builder.h"
3 
4 #include "sid.h"
5 #include "radv_cs.h"
6 
7 static nir_shader *
build_buffer_fill_shader(struct radv_device * dev)8 build_buffer_fill_shader(struct radv_device *dev)
9 {
10 	nir_builder b;
11 
12 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
13 	b.shader->info->name = ralloc_strdup(b.shader, "meta_buffer_fill");
14 	b.shader->info->cs.local_size[0] = 64;
15 	b.shader->info->cs.local_size[1] = 1;
16 	b.shader->info->cs.local_size[2] = 1;
17 
18 	nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
19 	nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
20 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
21 						b.shader->info->cs.local_size[0],
22 						b.shader->info->cs.local_size[1],
23 						b.shader->info->cs.local_size[2], 0);
24 
25 	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
26 
27 	nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));
28 	offset = nir_swizzle(&b, offset, (unsigned[]) {0, 0, 0, 0}, 1, false);
29 
30 	nir_intrinsic_instr *dst_buf = nir_intrinsic_instr_create(b.shader,
31 	                                                          nir_intrinsic_vulkan_resource_index);
32 	dst_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
33 	nir_intrinsic_set_desc_set(dst_buf, 0);
34 	nir_intrinsic_set_binding(dst_buf, 0);
35 	nir_ssa_dest_init(&dst_buf->instr, &dst_buf->dest, 1, 32, NULL);
36 	nir_builder_instr_insert(&b, &dst_buf->instr);
37 
38 	nir_intrinsic_instr *load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
39 	load->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
40 	load->num_components = 1;
41 	nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "fill_value");
42 	nir_builder_instr_insert(&b, &load->instr);
43 
44 	nir_ssa_def *swizzled_load = nir_swizzle(&b, &load->dest.ssa, (unsigned[]) { 0, 0, 0, 0}, 4, false);
45 
46 	nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
47 	store->src[0] = nir_src_for_ssa(swizzled_load);
48 	store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa);
49 	store->src[2] = nir_src_for_ssa(offset);
50 	nir_intrinsic_set_write_mask(store, 0xf);
51 	store->num_components = 4;
52 	nir_builder_instr_insert(&b, &store->instr);
53 
54 	return b.shader;
55 }
56 
57 static nir_shader *
build_buffer_copy_shader(struct radv_device * dev)58 build_buffer_copy_shader(struct radv_device *dev)
59 {
60 	nir_builder b;
61 
62 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
63 	b.shader->info->name = ralloc_strdup(b.shader, "meta_buffer_copy");
64 	b.shader->info->cs.local_size[0] = 64;
65 	b.shader->info->cs.local_size[1] = 1;
66 	b.shader->info->cs.local_size[2] = 1;
67 
68 	nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
69 	nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
70 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
71 						b.shader->info->cs.local_size[0],
72 						b.shader->info->cs.local_size[1],
73 						b.shader->info->cs.local_size[2], 0);
74 
75 	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
76 
77 	nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));
78 	offset = nir_swizzle(&b, offset, (unsigned[]) {0, 0, 0, 0}, 1, false);
79 
80 	nir_intrinsic_instr *dst_buf = nir_intrinsic_instr_create(b.shader,
81 	                                                          nir_intrinsic_vulkan_resource_index);
82 	dst_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
83 	nir_intrinsic_set_desc_set(dst_buf, 0);
84 	nir_intrinsic_set_binding(dst_buf, 0);
85 	nir_ssa_dest_init(&dst_buf->instr, &dst_buf->dest, 1, 32, NULL);
86 	nir_builder_instr_insert(&b, &dst_buf->instr);
87 
88 	nir_intrinsic_instr *src_buf = nir_intrinsic_instr_create(b.shader,
89 	                                                          nir_intrinsic_vulkan_resource_index);
90 	src_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
91 	nir_intrinsic_set_desc_set(src_buf, 0);
92 	nir_intrinsic_set_binding(src_buf, 1);
93 	nir_ssa_dest_init(&src_buf->instr, &src_buf->dest, 1, 32, NULL);
94 	nir_builder_instr_insert(&b, &src_buf->instr);
95 
96 	nir_intrinsic_instr *load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ssbo);
97 	load->src[0] = nir_src_for_ssa(&src_buf->dest.ssa);
98 	load->src[1] = nir_src_for_ssa(offset);
99 	nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
100 	load->num_components = 4;
101 	nir_builder_instr_insert(&b, &load->instr);
102 
103 	nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
104 	store->src[0] = nir_src_for_ssa(&load->dest.ssa);
105 	store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa);
106 	store->src[2] = nir_src_for_ssa(offset);
107 	nir_intrinsic_set_write_mask(store, 0xf);
108 	store->num_components = 4;
109 	nir_builder_instr_insert(&b, &store->instr);
110 
111 	return b.shader;
112 }
113 
114 
115 
radv_device_init_meta_buffer_state(struct radv_device * device)116 VkResult radv_device_init_meta_buffer_state(struct radv_device *device)
117 {
118 	VkResult result;
119 	struct radv_shader_module fill_cs = { .nir = NULL };
120 	struct radv_shader_module copy_cs = { .nir = NULL };
121 
122 	zero(device->meta_state.buffer);
123 
124 	fill_cs.nir = build_buffer_fill_shader(device);
125 	copy_cs.nir = build_buffer_copy_shader(device);
126 
127 	VkDescriptorSetLayoutCreateInfo fill_ds_create_info = {
128 		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
129 		.bindingCount = 1,
130 		.pBindings = (VkDescriptorSetLayoutBinding[]) {
131 			{
132 				.binding = 0,
133 				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
134 				.descriptorCount = 1,
135 				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
136 				.pImmutableSamplers = NULL
137 			},
138 		}
139 	};
140 
141 	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
142 						&fill_ds_create_info,
143 						&device->meta_state.alloc,
144 						&device->meta_state.buffer.fill_ds_layout);
145 	if (result != VK_SUCCESS)
146 		goto fail;
147 
148 	VkDescriptorSetLayoutCreateInfo copy_ds_create_info = {
149 		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
150 		.bindingCount = 2,
151 		.pBindings = (VkDescriptorSetLayoutBinding[]) {
152 			{
153 				.binding = 0,
154 				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
155 				.descriptorCount = 1,
156 				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
157 				.pImmutableSamplers = NULL
158 			},
159 			{
160 				.binding = 1,
161 				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
162 				.descriptorCount = 1,
163 				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
164 				.pImmutableSamplers = NULL
165 			},
166 		}
167 	};
168 
169 	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
170 						&copy_ds_create_info,
171 						&device->meta_state.alloc,
172 						&device->meta_state.buffer.copy_ds_layout);
173 	if (result != VK_SUCCESS)
174 		goto fail;
175 
176 
177 	VkPipelineLayoutCreateInfo fill_pl_create_info = {
178 		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
179 		.setLayoutCount = 1,
180 		.pSetLayouts = &device->meta_state.buffer.fill_ds_layout,
181 		.pushConstantRangeCount = 1,
182 		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 4},
183 	};
184 
185 	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
186 					  &fill_pl_create_info,
187 					  &device->meta_state.alloc,
188 					  &device->meta_state.buffer.fill_p_layout);
189 	if (result != VK_SUCCESS)
190 		goto fail;
191 
192 	VkPipelineLayoutCreateInfo copy_pl_create_info = {
193 		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
194 		.setLayoutCount = 1,
195 		.pSetLayouts = &device->meta_state.buffer.copy_ds_layout,
196 		.pushConstantRangeCount = 0,
197 	};
198 
199 	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
200 					  &copy_pl_create_info,
201 					  &device->meta_state.alloc,
202 					  &device->meta_state.buffer.copy_p_layout);
203 	if (result != VK_SUCCESS)
204 		goto fail;
205 
206 	VkPipelineShaderStageCreateInfo fill_pipeline_shader_stage = {
207 		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
208 		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
209 		.module = radv_shader_module_to_handle(&fill_cs),
210 		.pName = "main",
211 		.pSpecializationInfo = NULL,
212 	};
213 
214 	VkComputePipelineCreateInfo fill_vk_pipeline_info = {
215 		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
216 		.stage = fill_pipeline_shader_stage,
217 		.flags = 0,
218 		.layout = device->meta_state.buffer.fill_p_layout,
219 	};
220 
221 	result = radv_CreateComputePipelines(radv_device_to_handle(device),
222 					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
223 					     1, &fill_vk_pipeline_info, NULL,
224 					     &device->meta_state.buffer.fill_pipeline);
225 	if (result != VK_SUCCESS)
226 		goto fail;
227 
228 	VkPipelineShaderStageCreateInfo copy_pipeline_shader_stage = {
229 		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
230 		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
231 		.module = radv_shader_module_to_handle(&copy_cs),
232 		.pName = "main",
233 		.pSpecializationInfo = NULL,
234 	};
235 
236 	VkComputePipelineCreateInfo copy_vk_pipeline_info = {
237 		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
238 		.stage = copy_pipeline_shader_stage,
239 		.flags = 0,
240 		.layout = device->meta_state.buffer.copy_p_layout,
241 	};
242 
243 	result = radv_CreateComputePipelines(radv_device_to_handle(device),
244 					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
245 					     1, &copy_vk_pipeline_info, NULL,
246 					     &device->meta_state.buffer.copy_pipeline);
247 	if (result != VK_SUCCESS)
248 		goto fail;
249 
250 	ralloc_free(fill_cs.nir);
251 	ralloc_free(copy_cs.nir);
252 	return VK_SUCCESS;
253 fail:
254 	radv_device_finish_meta_buffer_state(device);
255 	ralloc_free(fill_cs.nir);
256 	ralloc_free(copy_cs.nir);
257 	return result;
258 }
259 
radv_device_finish_meta_buffer_state(struct radv_device * device)260 void radv_device_finish_meta_buffer_state(struct radv_device *device)
261 {
262 	if (device->meta_state.buffer.copy_pipeline)
263 		radv_DestroyPipeline(radv_device_to_handle(device),
264 				     device->meta_state.buffer.copy_pipeline,
265 				     &device->meta_state.alloc);
266 
267 	if (device->meta_state.buffer.fill_pipeline)
268 		radv_DestroyPipeline(radv_device_to_handle(device),
269 				     device->meta_state.buffer.fill_pipeline,
270 				     &device->meta_state.alloc);
271 
272 	if (device->meta_state.buffer.copy_p_layout)
273 		radv_DestroyPipelineLayout(radv_device_to_handle(device),
274 					   device->meta_state.buffer.copy_p_layout,
275 					   &device->meta_state.alloc);
276 
277 	if (device->meta_state.buffer.fill_p_layout)
278 		radv_DestroyPipelineLayout(radv_device_to_handle(device),
279 					   device->meta_state.buffer.fill_p_layout,
280 					   &device->meta_state.alloc);
281 
282 	if (device->meta_state.buffer.copy_ds_layout)
283 		radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
284 						device->meta_state.buffer.copy_ds_layout,
285 						&device->meta_state.alloc);
286 
287 	if (device->meta_state.buffer.fill_ds_layout)
288 		radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
289 						device->meta_state.buffer.fill_ds_layout,
290 						&device->meta_state.alloc);
291 }
292 
fill_buffer_shader(struct radv_cmd_buffer * cmd_buffer,struct radeon_winsys_bo * bo,uint64_t offset,uint64_t size,uint32_t value)293 static void fill_buffer_shader(struct radv_cmd_buffer *cmd_buffer,
294 			       struct radeon_winsys_bo *bo,
295 			       uint64_t offset, uint64_t size, uint32_t value)
296 {
297 	struct radv_device *device = cmd_buffer->device;
298 	uint64_t block_count = round_up_u64(size, 1024);
299 	struct radv_meta_saved_compute_state saved_state;
300 	VkDescriptorSet ds;
301 
302 	radv_meta_save_compute(&saved_state, cmd_buffer, 4);
303 
304 	radv_temp_descriptor_set_create(device, cmd_buffer,
305 					device->meta_state.buffer.fill_ds_layout,
306 					&ds);
307 
308 	struct radv_buffer dst_buffer = {
309 		.bo = bo,
310 		.offset = offset,
311 		.size = size
312 	};
313 
314 	radv_UpdateDescriptorSets(radv_device_to_handle(device),
315 				  1, /* writeCount */
316 				  (VkWriteDescriptorSet[]) {
317 					  {
318 						  .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
319 						  .dstSet = ds,
320 						  .dstBinding = 0,
321 						  .dstArrayElement = 0,
322 						  .descriptorCount = 1,
323 						  .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
324 						  .pBufferInfo = &(VkDescriptorBufferInfo) {
325 							.buffer = radv_buffer_to_handle(&dst_buffer),
326 							.offset = 0,
327 							.range = size
328 						  }
329 					  }
330 				  }, 0, NULL);
331 
332 	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
333 			     VK_PIPELINE_BIND_POINT_COMPUTE,
334 			     device->meta_state.buffer.fill_pipeline);
335 
336 	radv_CmdBindDescriptorSets(radv_cmd_buffer_to_handle(cmd_buffer),
337 				   VK_PIPELINE_BIND_POINT_COMPUTE,
338 				   device->meta_state.buffer.fill_p_layout, 0, 1,
339 				   &ds, 0, NULL);
340 
341 	radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
342 			      device->meta_state.buffer.fill_p_layout,
343 			      VK_SHADER_STAGE_COMPUTE_BIT, 0, 4,
344 			      &value);
345 
346 	radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
347 
348 	radv_temp_descriptor_set_destroy(device, ds);
349 
350 	radv_meta_restore_compute(&saved_state, cmd_buffer, 4);
351 }
352 
copy_buffer_shader(struct radv_cmd_buffer * cmd_buffer,struct radeon_winsys_bo * src_bo,struct radeon_winsys_bo * dst_bo,uint64_t src_offset,uint64_t dst_offset,uint64_t size)353 static void copy_buffer_shader(struct radv_cmd_buffer *cmd_buffer,
354 			       struct radeon_winsys_bo *src_bo,
355 			       struct radeon_winsys_bo *dst_bo,
356 			       uint64_t src_offset, uint64_t dst_offset,
357 			       uint64_t size)
358 {
359 	struct radv_device *device = cmd_buffer->device;
360 	uint64_t block_count = round_up_u64(size, 1024);
361 	struct radv_meta_saved_compute_state saved_state;
362 	VkDescriptorSet ds;
363 
364 	radv_meta_save_compute(&saved_state, cmd_buffer, 0);
365 
366 	radv_temp_descriptor_set_create(device, cmd_buffer,
367 					device->meta_state.buffer.copy_ds_layout,
368 					&ds);
369 
370 	struct radv_buffer dst_buffer = {
371 		.bo = dst_bo,
372 		.offset = dst_offset,
373 		.size = size
374 	};
375 
376 	struct radv_buffer src_buffer = {
377 		.bo = src_bo,
378 		.offset = src_offset,
379 		.size = size
380 	};
381 
382 	radv_UpdateDescriptorSets(radv_device_to_handle(device),
383 				  2, /* writeCount */
384 				  (VkWriteDescriptorSet[]) {
385 					  {
386 						  .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
387 						  .dstSet = ds,
388 						  .dstBinding = 0,
389 						  .dstArrayElement = 0,
390 						  .descriptorCount = 1,
391 						  .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
392 						  .pBufferInfo = &(VkDescriptorBufferInfo) {
393 							.buffer = radv_buffer_to_handle(&dst_buffer),
394 							.offset = 0,
395 							.range = size
396 						  }
397 					  },
398 					  {
399 						  .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
400 						  .dstSet = ds,
401 						  .dstBinding = 1,
402 						  .dstArrayElement = 0,
403 						  .descriptorCount = 1,
404 						  .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
405 						  .pBufferInfo = &(VkDescriptorBufferInfo) {
406 							.buffer = radv_buffer_to_handle(&src_buffer),
407 							.offset = 0,
408 							.range = size
409 						  }
410 					  }
411 				  }, 0, NULL);
412 
413 	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
414 			     VK_PIPELINE_BIND_POINT_COMPUTE,
415 			     device->meta_state.buffer.copy_pipeline);
416 
417 	radv_CmdBindDescriptorSets(radv_cmd_buffer_to_handle(cmd_buffer),
418 				   VK_PIPELINE_BIND_POINT_COMPUTE,
419 				   device->meta_state.buffer.copy_p_layout, 0, 1,
420 				   &ds, 0, NULL);
421 
422 
423 	radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
424 
425 	radv_temp_descriptor_set_destroy(device, ds);
426 
427 	radv_meta_restore_compute(&saved_state, cmd_buffer, 0);
428 }
429 
430 
radv_fill_buffer(struct radv_cmd_buffer * cmd_buffer,struct radeon_winsys_bo * bo,uint64_t offset,uint64_t size,uint32_t value)431 void radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer,
432 		      struct radeon_winsys_bo *bo,
433 		      uint64_t offset, uint64_t size, uint32_t value)
434 {
435 	assert(!(offset & 3));
436 	assert(!(size & 3));
437 
438 	if (size >= 4096)
439 		fill_buffer_shader(cmd_buffer, bo, offset, size, value);
440 	else if (size) {
441 		uint64_t va = cmd_buffer->device->ws->buffer_get_va(bo);
442 		va += offset;
443 		cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, bo, 8);
444 		si_cp_dma_clear_buffer(cmd_buffer, va, size, value);
445 	}
446 }
447 
448 static
radv_copy_buffer(struct radv_cmd_buffer * cmd_buffer,struct radeon_winsys_bo * src_bo,struct radeon_winsys_bo * dst_bo,uint64_t src_offset,uint64_t dst_offset,uint64_t size)449 void radv_copy_buffer(struct radv_cmd_buffer *cmd_buffer,
450 		      struct radeon_winsys_bo *src_bo,
451 		      struct radeon_winsys_bo *dst_bo,
452 		      uint64_t src_offset, uint64_t dst_offset,
453 		      uint64_t size)
454 {
455 	if (size >= 4096 && !(size & 3) && !(src_offset & 3) && !(dst_offset & 3))
456 		copy_buffer_shader(cmd_buffer, src_bo, dst_bo,
457 				   src_offset, dst_offset, size);
458 	else if (size) {
459 		uint64_t src_va = cmd_buffer->device->ws->buffer_get_va(src_bo);
460 		uint64_t dst_va = cmd_buffer->device->ws->buffer_get_va(dst_bo);
461 		src_va += src_offset;
462 		dst_va += dst_offset;
463 
464 		cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, src_bo, 8);
465 		cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, dst_bo, 8);
466 
467 		si_cp_dma_buffer_copy(cmd_buffer, src_va, dst_va, size);
468 	}
469 }
470 
radv_CmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize fillSize,uint32_t data)471 void radv_CmdFillBuffer(
472     VkCommandBuffer                             commandBuffer,
473     VkBuffer                                    dstBuffer,
474     VkDeviceSize                                dstOffset,
475     VkDeviceSize                                fillSize,
476     uint32_t                                    data)
477 {
478 	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
479 	RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
480 
481 	if (fillSize == VK_WHOLE_SIZE)
482 		fillSize = (dst_buffer->size - dstOffset) & ~3ull;
483 
484 	radv_fill_buffer(cmd_buffer, dst_buffer->bo, dst_buffer->offset + dstOffset,
485 			 fillSize, data);
486 }
487 
radv_CmdCopyBuffer(VkCommandBuffer commandBuffer,VkBuffer srcBuffer,VkBuffer destBuffer,uint32_t regionCount,const VkBufferCopy * pRegions)488 void radv_CmdCopyBuffer(
489 	VkCommandBuffer                             commandBuffer,
490 	VkBuffer                                    srcBuffer,
491 	VkBuffer                                    destBuffer,
492 	uint32_t                                    regionCount,
493 	const VkBufferCopy*                         pRegions)
494 {
495 	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
496 	RADV_FROM_HANDLE(radv_buffer, src_buffer, srcBuffer);
497 	RADV_FROM_HANDLE(radv_buffer, dest_buffer, destBuffer);
498 
499 	for (unsigned r = 0; r < regionCount; r++) {
500 		uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset;
501 		uint64_t dest_offset = dest_buffer->offset + pRegions[r].dstOffset;
502 		uint64_t copy_size = pRegions[r].size;
503 
504 		radv_copy_buffer(cmd_buffer, src_buffer->bo, dest_buffer->bo,
505 				 src_offset, dest_offset, copy_size);
506 	}
507 }
508 
radv_CmdUpdateBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize dataSize,const void * pData)509 void radv_CmdUpdateBuffer(
510 	VkCommandBuffer                             commandBuffer,
511 	VkBuffer                                    dstBuffer,
512 	VkDeviceSize                                dstOffset,
513 	VkDeviceSize                                dataSize,
514 	const void*                                 pData)
515 {
516 	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
517 	RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
518 	bool mec = radv_cmd_buffer_uses_mec(cmd_buffer);
519 	uint64_t words = dataSize / 4;
520 	uint64_t va = cmd_buffer->device->ws->buffer_get_va(dst_buffer->bo);
521 	va += dstOffset + dst_buffer->offset;
522 
523 	assert(!(dataSize & 3));
524 	assert(!(va & 3));
525 
526 	if (dataSize < 4096) {
527 		si_emit_cache_flush(cmd_buffer);
528 
529 		cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, dst_buffer->bo, 8);
530 
531 		radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, words + 4);
532 
533 		radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + words, 0));
534 		radeon_emit(cmd_buffer->cs, S_370_DST_SEL(mec ?
535 		                                V_370_MEM_ASYNC : V_370_MEMORY_SYNC) |
536 		                            S_370_WR_CONFIRM(1) |
537 		                            S_370_ENGINE_SEL(V_370_ME));
538 		radeon_emit(cmd_buffer->cs, va);
539 		radeon_emit(cmd_buffer->cs, va >> 32);
540 		radeon_emit_array(cmd_buffer->cs, pData, words);
541 	} else {
542 		uint32_t buf_offset;
543 		radv_cmd_buffer_upload_data(cmd_buffer, dataSize, 32, pData, &buf_offset);
544 		radv_copy_buffer(cmd_buffer, cmd_buffer->upload.upload_bo, dst_buffer->bo,
545 				 buf_offset, dstOffset + dst_buffer->offset, dataSize);
546 	}
547 }
548