• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2021 The Fuchsia Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef SRC_GRAPHICS_LIB_COMPUTE_RADIX_SORT_PLATFORMS_VK_INCLUDE_RADIX_SORT_PLATFORMS_VK_RADIX_SORT_VK_H_
6 #define SRC_GRAPHICS_LIB_COMPUTE_RADIX_SORT_PLATFORMS_VK_INCLUDE_RADIX_SORT_PLATFORMS_VK_RADIX_SORT_VK_H_
7 
8 //
9 //
10 //
11 
12 #include <vulkan/vulkan_core.h>
13 
14 //
15 //
16 //
17 
18 #include <stdbool.h>
19 #include <stdint.h>
20 
21 //
22 //
23 //
24 
25 #include "target.h"
26 
27 //
28 // Radix Sort Vk is a high-performance sorting library for Vulkan 1.2.
29 //
30 // The sorting function is both directly and indirectly dispatchable.
31 //
32 
33 #ifdef __cplusplus
34 extern "C" {
35 #endif
36 
37 //
38 // Get a Radix Sort target's Vulkan requirements.
39 //
40 // A Radix Sort target is a binary image containing configuration parameters and
41 // a bundle of SPIR-V modules.
42 //
43 // Targets are prebuilt and specific to a particular device vendor, architecture
44 // and key-val configuration.
45 //
46 // A Radix Sort instance can only be created with a VkDevice that is initialized
47 // with all of the target's required extensions and features.
48 //
49 // The `radix_sort_vk_target_get_requirements()` function yields the extensions
50 // and initialized feature flags required by a Radix Sort target.
51 //
52 // These requirements can be merged with other Vulkan library requirements
53 // before VkDevice creation.
54 //
55 // If the `.ext_names` member is NULL, the `.ext_name_count` member will be
56 // initialized.
57 //
58 // Returns `false` if:
59 //
60 //   * The .ext_names field is NULL and the number of required extensions is
61 //     greater than zero.
62 //   * The .ext_name_count is less than the number of required extensions is
63 //     greater than zero.
64 //   * Any of the .pdf, .pdf11 or .pdf12 members are NULL.
65 //
66 // Otherwise, returns true.
67 //
68 typedef struct radix_sort_vk_target radix_sort_vk_target_t;
69 
70 //
71 // NOTE: The library currently supports uint32_t and uint64_t keyvals.
72 //
73 
74 #define RS_KV_DWORDS_MAX 2
75 
76 //
77 //
78 //
79 
80 struct rs_pipeline_layout_scatter
81 {
82   VkPipelineLayout even;
83   VkPipelineLayout odd;
84 };
85 
86 struct rs_pipeline_scatter
87 {
88   VkPipeline even;
89   VkPipeline odd;
90 };
91 
92 //
93 //
94 //
95 
96 struct rs_pipeline_layouts_named
97 {
98   VkPipelineLayout                  init;
99   VkPipelineLayout                  fill;
100   VkPipelineLayout                  histogram;
101   VkPipelineLayout                  prefix;
102   struct rs_pipeline_layout_scatter scatter[RS_KV_DWORDS_MAX];
103 };
104 
105 struct rs_pipelines_named
106 {
107   VkPipeline                 init;
108   VkPipeline                 fill;
109   VkPipeline                 histogram;
110   VkPipeline                 prefix;
111   struct rs_pipeline_scatter scatter[RS_KV_DWORDS_MAX];
112 };
113 
114 // clang-format off
115 #define RS_PIPELINE_LAYOUTS_HANDLES (sizeof(struct rs_pipeline_layouts_named) / sizeof(VkPipelineLayout))
116 #define RS_PIPELINES_HANDLES        (sizeof(struct rs_pipelines_named)        / sizeof(VkPipeline))
117 // clang-format on
118 
119 //
120 //
121 //
122 
123 struct radix_sort_vk
124 {
125   struct radix_sort_vk_target_config config;
126 
127   union
128   {
129     struct rs_pipeline_layouts_named named;
130     VkPipelineLayout                 handles[RS_PIPELINE_LAYOUTS_HANDLES];
131   } pipeline_layouts;
132 
133   union
134   {
135     struct rs_pipelines_named named;
136     VkPipeline                handles[RS_PIPELINES_HANDLES];
137   } pipelines;
138 
139   struct
140   {
141     struct
142     {
143       VkDeviceSize offset;
144       VkDeviceSize range;
145     } histograms;
146 
147     struct
148     {
149       VkDeviceSize offset;
150     } partitions;
151 
152   } internal;
153 };
154 
155 //
156 // Create a Radix Sort instance for a target.(VkCommandBuffer                     cb,
157 //
158 // Keyval size is implicitly determined by the target.
159 //
160 // Returns NULL on failure.
161 //
162 typedef struct radix_sort_vk radix_sort_vk_t;
163 
164 //
165 //
166 //
167 radix_sort_vk_t *
168 radix_sort_vk_create(VkDevice                           device,
169                      VkAllocationCallbacks const *      ac,
170                      VkPipelineCache                    pc,
171                      const uint32_t* const*             spv,
172                      const uint32_t*                    spv_sizes,
173                      struct radix_sort_vk_target_config config);
174 
175 //
176 // Destroy the Radix Sort instance using the same device and allocator used at
177 // creation.
178 //
179 void
180 radix_sort_vk_destroy(radix_sort_vk_t *             rs,  //
181                       VkDevice                      d,
182                       VkAllocationCallbacks const * ac);
183 
184 //
185 // Returns the buffer size and alignment requirements for a maximum number of
186 // keyvals.
187 //
188 // The radix sort implementation is not an in-place sorting algorithm so two
189 // non-overlapping keyval buffers are required that are at least
190 // `.keyvals_size`.
191 //
192 // The radix sort instance also requires an `internal` buffer during sorting.
193 //
194 // If the indirect dispatch sorting function is used, then an `indirect` buffer
195 // is also required.
196 //
197 // The alignment requirements for the keyval, internal, and indirect buffers
198 // must be honored.  All alignments are power of 2.
199 //
200 //   Input:
201 //     count             : Maximum number of keyvals
202 //
203 //   Outputs:
204 //     keyval_size       : Size of a single keyval
205 //
206 //     keyvals_size      : Minimum size of the even and odd keyval buffers
207 //     keyvals_alignment : Alignment of each keyval buffer
208 //
209 //     internal_size     : Minimum size of internal buffer
210 //     internal_aligment : Alignment of the internal buffer
211 //
212 //     indirect_size     : Minimum size of indirect buffer
213 //     indirect_aligment : Alignment of the indirect buffer
214 //
215 //   .keyvals_even/odd
216 //   -----------------
217 //   VK_BUFFER_USAGE_STORAGE_BUFFER_BIT
218 //   VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT
219 //
220 //   .internal
221 //   ---------
222 //   VK_BUFFER_USAGE_STORAGE_BUFFER_BIT
223 //   VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT
224 //   VK_BUFFER_USAGE_TRANSFER_DST_BIT ("direct" mode only)
225 //
226 //   .indirect
227 //   ---------
228 //   VK_BUFFER_USAGE_STORAGE_BUFFER_BIT
229 //   VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT
230 //   VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT
231 //
232 typedef struct radix_sort_vk_memory_requirements
233 {
234   VkDeviceSize keyval_size;
235 
236   VkDeviceSize keyvals_size;
237   VkDeviceSize keyvals_alignment;
238 
239   VkDeviceSize internal_size;
240   VkDeviceSize internal_alignment;
241 
242   VkDeviceSize indirect_size;
243   VkDeviceSize indirect_alignment;
244 } radix_sort_vk_memory_requirements_t;
245 
246 void
247 radix_sort_vk_get_memory_requirements(radix_sort_vk_t const *               rs,
248                                       uint32_t                              count,
249                                       radix_sort_vk_memory_requirements_t * mr);
250 
251 //
252 // Direct dispatch sorting
253 // -----------------------
254 //
255 // Using a key size of `key_bits`, sort `count` keyvals found in the
256 // `.devaddr_keyvals_even` buffer.
257 //
258 // Each internal sorting pass copies the keyvals from one keyvals buffer to the
259 // other.
260 //
261 // The number of internal sorting passes is determined by `.key_bits`.
262 //
263 // If an even number of internal sorting passes is required, the sorted keyvals
264 // will be found in the "even" keyvals buffer.  Otherwise, the sorted keyvals
265 // will be found in the "odd" keyvals buffer.
266 //
267 // Which buffer has the sorted keyvals is returned in `keyvals_sorted`.
268 //
269 // A keyval's `key_bits` are the most significant bits of a keyval.
270 //
271 // The maximum number of key bits is determined by the keyval size.
272 //
273 // The keyval count must be less than (1 << 30) as well as be less than or equal
274 // to the count used to obtain the the memory requirements.
275 //
276 // The info struct's `ext` member must be NULL.
277 //
278 // This function appends push constants, dispatch commands, and barriers.
279 //
280 // Pipeline barriers should be applied as necessary, both before and after
281 // invoking this function.
282 //
283 // The sort begins with either a TRANSFER/WRITE or a COMPUTE/READ to the
284 // `internal` and `keyvals_even` buffers.
285 //
286 // The sort ends with a COMPUTE/WRITE to the `internal` and `keyvals_sorted`
287 // buffers.
288 //
289 
290 //
291 // Direct dispatch sorting using VkDescriptorBufferInfo structures
292 // ---------------------------------------------------------------
293 //
294 typedef struct radix_sort_vk_sort_info
295 {
296   void *                 ext;
297   uint32_t               key_bits;
298   uint32_t               count;
299   VkDescriptorBufferInfo keyvals_even;
300   VkDescriptorBufferInfo keyvals_odd;
301   VkDescriptorBufferInfo internal;
302 } radix_sort_vk_sort_info_t;
303 
304 void
305 radix_sort_vk_sort(radix_sort_vk_t const *           rs,
306                    radix_sort_vk_sort_info_t const * info,
307                    VkDevice                          device,
308                    VkCommandBuffer                   cb,
309                    VkDescriptorBufferInfo *          keyvals_sorted);
310 
311 //
312 // Indirect dispatch sorting
313 // -------------------------
314 //
315 // Using a key size of `key_bits`, at pipeline execution time, load keyvals
316 // count from `devaddr_count` and sorts the keyvals in `.devaddr_keyvals_even`.
317 //
318 // Each internal sorting pass copies the keyvals from one keyvals buffer to the
319 // other.
320 //
321 // The number of internal sorting passes is determined by `.key_bits`.
322 //
323 // If an even number of internal sorting passes is required, the sorted keyvals
324 // will be found in the "even" keyvals buffer.  Otherwise, the sorted keyvals
325 // will be found in the "odd" keyvals buffer.
326 //
327 // Which buffer has the sorted keyvals is returned in `keyvals_sorted`.
328 //
329 // A keyval's `key_bits` are the most significant bits of a keyval.
330 //
331 // The keyval count must be less than (1 << 30) as well as be less than or equal
332 // to the count used to obtain the the memory requirements.
333 //
334 // The info struct's `ext` member must be NULL.
335 //
336 // This function appends push constants, dispatch commands, and barriers.
337 //
338 // Pipeline barriers should be applied as necessary, both before and after
339 // invoking this function.
340 //
341 // The indirect radix sort begins with a COMPUTE/READ from the `count` buffer
342 // and ends with a COMPUTE/WRITE to the `internal` and the `keyvals_sorted`
343 // buffers.
344 //
345 // The `indirect` buffer must support USAGE_INDIRECT.
346 //
347 // The `count` buffer must be at least 4 bytes and 4-byte aligned.
348 //
349 
350 //
351 // Indirect dispatch sorting using VkDescriptorBufferInfo structures
352 // -----------------------------------------------------------------
353 //
354 typedef struct radix_sort_vk_sort_indirect_info
355 {
356   void *                 ext;
357   uint32_t               key_bits;
358   VkDescriptorBufferInfo count;
359   VkDescriptorBufferInfo keyvals_even;
360   VkDescriptorBufferInfo keyvals_odd;
361   VkDescriptorBufferInfo internal;
362   VkDescriptorBufferInfo indirect;
363 } radix_sort_vk_sort_indirect_info_t;
364 
365 void
366 radix_sort_vk_sort_indirect(radix_sort_vk_t const *                    rs,
367                             radix_sort_vk_sort_indirect_info_t const * info,
368                             VkDevice                                   device,
369                             VkCommandBuffer                            cb,
370                             VkDescriptorBufferInfo *                   keyvals_sorted);
371 
372 //
373 //
374 //
375 
376 #ifdef __cplusplus
377 }
378 #endif
379 
380 //
381 //
382 //
383 
384 #endif  // SRC_GRAPHICS_LIB_COMPUTE_RADIX_SORT_PLATFORMS_VK_INCLUDE_RADIX_SORT_PLATFORMS_VK_RADIX_SORT_VK_H_
385