1 /*
2 * Copyright © 2022 Konstantin Seurer
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "radv_radix_sort.h"
25 #include "targets/u64/config.h"
26 #include "radv_private.h"
27 #include "target.h"
28
29 static const uint32_t init_spv[] = {
30 #include "radix_sort/shaders/init.comp.spv.h"
31 };
32
33 static const uint32_t fill_spv[] = {
34 #include "radix_sort/shaders/fill.comp.spv.h"
35 };
36
37 static const uint32_t histogram_spv[] = {
38 #include "radix_sort/shaders/histogram.comp.spv.h"
39 };
40
41 static const uint32_t prefix_spv[] = {
42 #include "radix_sort/shaders/prefix.comp.spv.h"
43 };
44
45 static const uint32_t scatter_0_even_spv[] = {
46 #include "radix_sort/shaders/scatter_0_even.comp.spv.h"
47 };
48
49 static const uint32_t scatter_0_odd_spv[] = {
50 #include "radix_sort/shaders/scatter_0_odd.comp.spv.h"
51 };
52
53 static const uint32_t scatter_1_even_spv[] = {
54 #include "radix_sort/shaders/scatter_1_even.comp.spv.h"
55 };
56
57 static const uint32_t scatter_1_odd_spv[] = {
58 #include "radix_sort/shaders/scatter_1_odd.comp.spv.h"
59 };
60
61 static const struct radix_sort_vk_target_config target_config = {
62 .keyval_dwords = RS_KEYVAL_DWORDS,
63
64 .histogram =
65 {
66 .workgroup_size_log2 = RS_HISTOGRAM_WORKGROUP_SIZE_LOG2,
67 .subgroup_size_log2 = RS_HISTOGRAM_SUBGROUP_SIZE_LOG2,
68 .block_rows = RS_HISTOGRAM_BLOCK_ROWS,
69 },
70
71 .prefix =
72 {
73 .workgroup_size_log2 = RS_PREFIX_WORKGROUP_SIZE_LOG2,
74 .subgroup_size_log2 = RS_PREFIX_SUBGROUP_SIZE_LOG2,
75 },
76
77 .scatter =
78 {
79 .workgroup_size_log2 = RS_SCATTER_WORKGROUP_SIZE_LOG2,
80 .subgroup_size_log2 = RS_SCATTER_SUBGROUP_SIZE_LOG2,
81 .block_rows = RS_SCATTER_BLOCK_ROWS,
82 },
83 };
84
85 radix_sort_vk_t *
radv_create_radix_sort_u64(VkDevice device,VkAllocationCallbacks const * ac,VkPipelineCache pc)86 radv_create_radix_sort_u64(VkDevice device, VkAllocationCallbacks const *ac, VkPipelineCache pc)
87 {
88 const uint32_t *spv[8] = {
89 init_spv, fill_spv, histogram_spv, prefix_spv,
90 scatter_0_even_spv, scatter_0_odd_spv, scatter_1_even_spv, scatter_1_odd_spv,
91 };
92 const uint32_t spv_sizes[8] = {
93 sizeof(init_spv), sizeof(fill_spv), sizeof(histogram_spv), sizeof(prefix_spv),
94 sizeof(scatter_0_even_spv), sizeof(scatter_0_odd_spv), sizeof(scatter_1_even_spv), sizeof(scatter_1_odd_spv),
95 };
96 return radix_sort_vk_create(device, ac, pc, spv, spv_sizes, target_config);
97 }
98
99 VKAPI_ATTR VkResult VKAPI_CALL
vkCreateShaderModule(VkDevice _device,const VkShaderModuleCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkShaderModule * pShaderModule)100 vkCreateShaderModule(VkDevice _device, const VkShaderModuleCreateInfo *pCreateInfo,
101 const VkAllocationCallbacks *pAllocator, VkShaderModule *pShaderModule)
102 {
103 RADV_FROM_HANDLE(radv_device, device, _device);
104 return device->vk.dispatch_table.CreateShaderModule(_device, pCreateInfo, pAllocator, pShaderModule);
105 }
106
107 VKAPI_ATTR void VKAPI_CALL
vkDestroyShaderModule(VkDevice _device,VkShaderModule shaderModule,const VkAllocationCallbacks * pAllocator)108 vkDestroyShaderModule(VkDevice _device, VkShaderModule shaderModule, const VkAllocationCallbacks *pAllocator)
109 {
110 RADV_FROM_HANDLE(radv_device, device, _device);
111 device->vk.dispatch_table.DestroyShaderModule(_device, shaderModule, pAllocator);
112 }
113
114 VKAPI_ATTR VkResult VKAPI_CALL
vkCreatePipelineLayout(VkDevice _device,const VkPipelineLayoutCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipelineLayout * pPipelineLayout)115 vkCreatePipelineLayout(VkDevice _device, const VkPipelineLayoutCreateInfo *pCreateInfo,
116 const VkAllocationCallbacks *pAllocator, VkPipelineLayout *pPipelineLayout)
117 {
118 RADV_FROM_HANDLE(radv_device, device, _device);
119 return device->vk.dispatch_table.CreatePipelineLayout(_device, pCreateInfo, pAllocator, pPipelineLayout);
120 }
121
122 VKAPI_ATTR void VKAPI_CALL
vkDestroyPipelineLayout(VkDevice _device,VkPipelineLayout pipelineLayout,const VkAllocationCallbacks * pAllocator)123 vkDestroyPipelineLayout(VkDevice _device, VkPipelineLayout pipelineLayout, const VkAllocationCallbacks *pAllocator)
124 {
125 RADV_FROM_HANDLE(radv_device, device, _device);
126 device->vk.dispatch_table.DestroyPipelineLayout(_device, pipelineLayout, pAllocator);
127 }
128
129 VKAPI_ATTR VkResult VKAPI_CALL
vkCreateComputePipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t createInfoCount,const VkComputePipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)130 vkCreateComputePipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t createInfoCount,
131 const VkComputePipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator,
132 VkPipeline *pPipelines)
133 {
134 RADV_FROM_HANDLE(radv_device, device, _device);
135 return device->vk.dispatch_table.CreateComputePipelines(_device, pipelineCache, createInfoCount, pCreateInfos,
136 pAllocator, pPipelines);
137 }
138
139 VKAPI_ATTR void VKAPI_CALL
vkDestroyPipeline(VkDevice _device,VkPipeline pipeline,const VkAllocationCallbacks * pAllocator)140 vkDestroyPipeline(VkDevice _device, VkPipeline pipeline, const VkAllocationCallbacks *pAllocator)
141 {
142 RADV_FROM_HANDLE(radv_device, device, _device);
143 device->vk.dispatch_table.DestroyPipeline(_device, pipeline, pAllocator);
144 }
145
146 VKAPI_ATTR void VKAPI_CALL
vkCmdPipelineBarrier(VkCommandBuffer commandBuffer,VkPipelineStageFlags srcStageMask,VkPipelineStageFlags dstStageMask,VkDependencyFlags dependencyFlags,uint32_t memoryBarrierCount,const VkMemoryBarrier * pMemoryBarriers,uint32_t bufferMemoryBarrierCount,const VkBufferMemoryBarrier * pBufferMemoryBarriers,uint32_t imageMemoryBarrierCount,const VkImageMemoryBarrier * pImageMemoryBarriers)147 vkCmdPipelineBarrier(VkCommandBuffer commandBuffer, VkPipelineStageFlags srcStageMask,
148 VkPipelineStageFlags dstStageMask, VkDependencyFlags dependencyFlags, uint32_t memoryBarrierCount,
149 const VkMemoryBarrier *pMemoryBarriers, uint32_t bufferMemoryBarrierCount,
150 const VkBufferMemoryBarrier *pBufferMemoryBarriers, uint32_t imageMemoryBarrierCount,
151 const VkImageMemoryBarrier *pImageMemoryBarriers)
152 {
153 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
154 cmd_buffer->device->vk.dispatch_table.CmdPipelineBarrier(
155 commandBuffer, srcStageMask, dstStageMask, dependencyFlags, memoryBarrierCount, pMemoryBarriers,
156 bufferMemoryBarrierCount, pBufferMemoryBarriers, imageMemoryBarrierCount, pImageMemoryBarriers);
157 }
158
159 VKAPI_ATTR void VKAPI_CALL
vkCmdPushConstants(VkCommandBuffer commandBuffer,VkPipelineLayout layout,VkShaderStageFlags stageFlags,uint32_t offset,uint32_t size,const void * pValues)160 vkCmdPushConstants(VkCommandBuffer commandBuffer, VkPipelineLayout layout, VkShaderStageFlags stageFlags,
161 uint32_t offset, uint32_t size, const void *pValues)
162 {
163 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
164 cmd_buffer->device->vk.dispatch_table.CmdPushConstants(commandBuffer, layout, stageFlags, offset, size, pValues);
165 }
166
167 VKAPI_ATTR void VKAPI_CALL
vkCmdBindPipeline(VkCommandBuffer commandBuffer,VkPipelineBindPoint pipelineBindPoint,VkPipeline pipeline)168 vkCmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipeline pipeline)
169 {
170 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
171 cmd_buffer->device->vk.dispatch_table.CmdBindPipeline(commandBuffer, pipelineBindPoint, pipeline);
172 }
173
174 VKAPI_ATTR void VKAPI_CALL
vkCmdDispatch(VkCommandBuffer commandBuffer,uint32_t groupCountX,uint32_t groupCountY,uint32_t groupCountZ)175 vkCmdDispatch(VkCommandBuffer commandBuffer, uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ)
176 {
177 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
178 cmd_buffer->device->vk.dispatch_table.CmdDispatch(commandBuffer, groupCountX, groupCountY, groupCountZ);
179 }
180
181 VKAPI_ATTR VkDeviceAddress VKAPI_CALL
vkGetBufferDeviceAddress(VkDevice _device,const VkBufferDeviceAddressInfo * pInfo)182 vkGetBufferDeviceAddress(VkDevice _device, const VkBufferDeviceAddressInfo *pInfo)
183 {
184 RADV_FROM_HANDLE(radv_device, device, _device);
185 return device->vk.dispatch_table.GetBufferDeviceAddress(_device, pInfo);
186 }
187
188 VKAPI_ATTR void VKAPI_CALL
vkCmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize size,uint32_t data)189 vkCmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize size,
190 uint32_t data)
191 {
192 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
193 cmd_buffer->device->vk.dispatch_table.CmdFillBuffer(commandBuffer, dstBuffer, dstOffset, size, data);
194 }
195
196 VKAPI_ATTR void VKAPI_CALL
vkCmdDispatchIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset)197 vkCmdDispatchIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset)
198 {
199 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
200 cmd_buffer->device->vk.dispatch_table.CmdDispatchIndirect(commandBuffer, buffer, offset);
201 }
202