1 // Copyright 2021 The Fuchsia Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef SRC_GRAPHICS_LIB_COMPUTE_RADIX_SORT_PLATFORMS_VK_INCLUDE_RADIX_SORT_PLATFORMS_VK_RADIX_SORT_VK_H_ 6 #define SRC_GRAPHICS_LIB_COMPUTE_RADIX_SORT_PLATFORMS_VK_INCLUDE_RADIX_SORT_PLATFORMS_VK_RADIX_SORT_VK_H_ 7 8 // 9 // 10 // 11 12 #include <vulkan/vulkan_core.h> 13 14 // 15 // 16 // 17 18 #include <stdbool.h> 19 #include <stdint.h> 20 21 // 22 // 23 // 24 25 #include "target.h" 26 27 // 28 // Radix Sort Vk is a high-performance sorting library for Vulkan 1.2. 29 // 30 // The sorting function is both directly and indirectly dispatchable. 31 // 32 33 #ifdef __cplusplus 34 extern "C" { 35 #endif 36 37 // 38 // Get a Radix Sort target's Vulkan requirements. 39 // 40 // A Radix Sort target is a binary image containing configuration parameters and 41 // a bundle of SPIR-V modules. 42 // 43 // Targets are prebuilt and specific to a particular device vendor, architecture 44 // and key-val configuration. 45 // 46 // A Radix Sort instance can only be created with a VkDevice that is initialized 47 // with all of the target's required extensions and features. 48 // 49 // The `radix_sort_vk_target_get_requirements()` function yields the extensions 50 // and initialized feature flags required by a Radix Sort target. 51 // 52 // These requirements can be merged with other Vulkan library requirements 53 // before VkDevice creation. 54 // 55 // If the `.ext_names` member is NULL, the `.ext_name_count` member will be 56 // initialized. 57 // 58 // Returns `false` if: 59 // 60 // * The .ext_names field is NULL and the number of required extensions is 61 // greater than zero. 62 // * The .ext_name_count is less than the number of required extensions is 63 // greater than zero. 64 // * Any of the .pdf, .pdf11 or .pdf12 members are NULL. 65 // 66 // Otherwise, returns true. 67 // 68 typedef struct radix_sort_vk_target radix_sort_vk_target_t; 69 70 // 71 // NOTE: The library currently supports uint32_t and uint64_t keyvals. 72 // 73 74 #define RS_KV_DWORDS_MAX 2 75 76 // 77 // 78 // 79 80 struct rs_pipeline_layout_scatter 81 { 82 VkPipelineLayout even; 83 VkPipelineLayout odd; 84 }; 85 86 struct rs_pipeline_scatter 87 { 88 VkPipeline even; 89 VkPipeline odd; 90 }; 91 92 // 93 // 94 // 95 96 struct rs_pipeline_layouts_named 97 { 98 VkPipelineLayout init; 99 VkPipelineLayout fill; 100 VkPipelineLayout histogram; 101 VkPipelineLayout prefix; 102 struct rs_pipeline_layout_scatter scatter[RS_KV_DWORDS_MAX]; 103 }; 104 105 struct rs_pipelines_named 106 { 107 VkPipeline init; 108 VkPipeline fill; 109 VkPipeline histogram; 110 VkPipeline prefix; 111 struct rs_pipeline_scatter scatter[RS_KV_DWORDS_MAX]; 112 }; 113 114 // clang-format off 115 #define RS_PIPELINE_LAYOUTS_HANDLES (sizeof(struct rs_pipeline_layouts_named) / sizeof(VkPipelineLayout)) 116 #define RS_PIPELINES_HANDLES (sizeof(struct rs_pipelines_named) / sizeof(VkPipeline)) 117 // clang-format on 118 119 // 120 // 121 // 122 123 struct radix_sort_vk 124 { 125 struct radix_sort_vk_target_config config; 126 127 union 128 { 129 struct rs_pipeline_layouts_named named; 130 VkPipelineLayout handles[RS_PIPELINE_LAYOUTS_HANDLES]; 131 } pipeline_layouts; 132 133 union 134 { 135 struct rs_pipelines_named named; 136 VkPipeline handles[RS_PIPELINES_HANDLES]; 137 } pipelines; 138 139 struct 140 { 141 struct 142 { 143 VkDeviceSize offset; 144 VkDeviceSize range; 145 } histograms; 146 147 struct 148 { 149 VkDeviceSize offset; 150 } partitions; 151 152 } internal; 153 }; 154 155 // 156 // Create a Radix Sort instance for a target.(VkCommandBuffer cb, 157 // 158 // Keyval size is implicitly determined by the target. 159 // 160 // Returns NULL on failure. 161 // 162 typedef struct radix_sort_vk radix_sort_vk_t; 163 164 // 165 // 166 // 167 radix_sort_vk_t * 168 radix_sort_vk_create(VkDevice device, 169 VkAllocationCallbacks const * ac, 170 VkPipelineCache pc, 171 const uint32_t* const* spv, 172 const uint32_t* spv_sizes, 173 struct radix_sort_vk_target_config config); 174 175 // 176 // Destroy the Radix Sort instance using the same device and allocator used at 177 // creation. 178 // 179 void 180 radix_sort_vk_destroy(radix_sort_vk_t * rs, // 181 VkDevice d, 182 VkAllocationCallbacks const * ac); 183 184 // 185 // Returns the buffer size and alignment requirements for a maximum number of 186 // keyvals. 187 // 188 // The radix sort implementation is not an in-place sorting algorithm so two 189 // non-overlapping keyval buffers are required that are at least 190 // `.keyvals_size`. 191 // 192 // The radix sort instance also requires an `internal` buffer during sorting. 193 // 194 // If the indirect dispatch sorting function is used, then an `indirect` buffer 195 // is also required. 196 // 197 // The alignment requirements for the keyval, internal, and indirect buffers 198 // must be honored. All alignments are power of 2. 199 // 200 // Input: 201 // count : Maximum number of keyvals 202 // 203 // Outputs: 204 // keyval_size : Size of a single keyval 205 // 206 // keyvals_size : Minimum size of the even and odd keyval buffers 207 // keyvals_alignment : Alignment of each keyval buffer 208 // 209 // internal_size : Minimum size of internal buffer 210 // internal_aligment : Alignment of the internal buffer 211 // 212 // indirect_size : Minimum size of indirect buffer 213 // indirect_aligment : Alignment of the indirect buffer 214 // 215 // .keyvals_even/odd 216 // ----------------- 217 // VK_BUFFER_USAGE_STORAGE_BUFFER_BIT 218 // VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT 219 // 220 // .internal 221 // --------- 222 // VK_BUFFER_USAGE_STORAGE_BUFFER_BIT 223 // VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT 224 // VK_BUFFER_USAGE_TRANSFER_DST_BIT ("direct" mode only) 225 // 226 // .indirect 227 // --------- 228 // VK_BUFFER_USAGE_STORAGE_BUFFER_BIT 229 // VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT 230 // VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT 231 // 232 typedef struct radix_sort_vk_memory_requirements 233 { 234 VkDeviceSize keyval_size; 235 236 VkDeviceSize keyvals_size; 237 VkDeviceSize keyvals_alignment; 238 239 VkDeviceSize internal_size; 240 VkDeviceSize internal_alignment; 241 242 VkDeviceSize indirect_size; 243 VkDeviceSize indirect_alignment; 244 } radix_sort_vk_memory_requirements_t; 245 246 void 247 radix_sort_vk_get_memory_requirements(radix_sort_vk_t const * rs, 248 uint32_t count, 249 radix_sort_vk_memory_requirements_t * mr); 250 251 // 252 // Direct dispatch sorting 253 // ----------------------- 254 // 255 // Using a key size of `key_bits`, sort `count` keyvals found in the 256 // `.devaddr_keyvals_even` buffer. 257 // 258 // Each internal sorting pass copies the keyvals from one keyvals buffer to the 259 // other. 260 // 261 // The number of internal sorting passes is determined by `.key_bits`. 262 // 263 // If an even number of internal sorting passes is required, the sorted keyvals 264 // will be found in the "even" keyvals buffer. Otherwise, the sorted keyvals 265 // will be found in the "odd" keyvals buffer. 266 // 267 // Which buffer has the sorted keyvals is returned in `keyvals_sorted`. 268 // 269 // A keyval's `key_bits` are the most significant bits of a keyval. 270 // 271 // The maximum number of key bits is determined by the keyval size. 272 // 273 // The keyval count must be less than (1 << 30) as well as be less than or equal 274 // to the count used to obtain the the memory requirements. 275 // 276 // The info struct's `ext` member must be NULL. 277 // 278 // This function appends push constants, dispatch commands, and barriers. 279 // 280 // Pipeline barriers should be applied as necessary, both before and after 281 // invoking this function. 282 // 283 // The sort begins with either a TRANSFER/WRITE or a COMPUTE/READ to the 284 // `internal` and `keyvals_even` buffers. 285 // 286 // The sort ends with a COMPUTE/WRITE to the `internal` and `keyvals_sorted` 287 // buffers. 288 // 289 290 // 291 // Direct dispatch sorting using VkDescriptorBufferInfo structures 292 // --------------------------------------------------------------- 293 // 294 typedef struct radix_sort_vk_sort_info 295 { 296 void * ext; 297 uint32_t key_bits; 298 uint32_t count; 299 VkDescriptorBufferInfo keyvals_even; 300 VkDescriptorBufferInfo keyvals_odd; 301 VkDescriptorBufferInfo internal; 302 } radix_sort_vk_sort_info_t; 303 304 void 305 radix_sort_vk_sort(radix_sort_vk_t const * rs, 306 radix_sort_vk_sort_info_t const * info, 307 VkDevice device, 308 VkCommandBuffer cb, 309 VkDescriptorBufferInfo * keyvals_sorted); 310 311 // 312 // Indirect dispatch sorting 313 // ------------------------- 314 // 315 // Using a key size of `key_bits`, at pipeline execution time, load keyvals 316 // count from `devaddr_count` and sorts the keyvals in `.devaddr_keyvals_even`. 317 // 318 // Each internal sorting pass copies the keyvals from one keyvals buffer to the 319 // other. 320 // 321 // The number of internal sorting passes is determined by `.key_bits`. 322 // 323 // If an even number of internal sorting passes is required, the sorted keyvals 324 // will be found in the "even" keyvals buffer. Otherwise, the sorted keyvals 325 // will be found in the "odd" keyvals buffer. 326 // 327 // Which buffer has the sorted keyvals is returned in `keyvals_sorted`. 328 // 329 // A keyval's `key_bits` are the most significant bits of a keyval. 330 // 331 // The keyval count must be less than (1 << 30) as well as be less than or equal 332 // to the count used to obtain the the memory requirements. 333 // 334 // The info struct's `ext` member must be NULL. 335 // 336 // This function appends push constants, dispatch commands, and barriers. 337 // 338 // Pipeline barriers should be applied as necessary, both before and after 339 // invoking this function. 340 // 341 // The indirect radix sort begins with a COMPUTE/READ from the `count` buffer 342 // and ends with a COMPUTE/WRITE to the `internal` and the `keyvals_sorted` 343 // buffers. 344 // 345 // The `indirect` buffer must support USAGE_INDIRECT. 346 // 347 // The `count` buffer must be at least 4 bytes and 4-byte aligned. 348 // 349 350 // 351 // Indirect dispatch sorting using VkDescriptorBufferInfo structures 352 // ----------------------------------------------------------------- 353 // 354 typedef struct radix_sort_vk_sort_indirect_info 355 { 356 void * ext; 357 uint32_t key_bits; 358 VkDescriptorBufferInfo count; 359 VkDescriptorBufferInfo keyvals_even; 360 VkDescriptorBufferInfo keyvals_odd; 361 VkDescriptorBufferInfo internal; 362 VkDescriptorBufferInfo indirect; 363 } radix_sort_vk_sort_indirect_info_t; 364 365 void 366 radix_sort_vk_sort_indirect(radix_sort_vk_t const * rs, 367 radix_sort_vk_sort_indirect_info_t const * info, 368 VkDevice device, 369 VkCommandBuffer cb, 370 VkDescriptorBufferInfo * keyvals_sorted); 371 372 // 373 // 374 // 375 376 #ifdef __cplusplus 377 } 378 #endif 379 380 // 381 // 382 // 383 384 #endif // SRC_GRAPHICS_LIB_COMPUTE_RADIX_SORT_PLATFORMS_VK_INCLUDE_RADIX_SORT_PLATFORMS_VK_RADIX_SORT_VK_H_ 385