1 /* 2 * Copyright 2018 Google Inc. 3 * 4 * Use of this source code is governed by a BSD-style license that can 5 * be found in the LICENSE file. 6 * 7 */ 8 9 #pragma once 10 11 // 12 // 13 // 14 15 #include <cuda.h> 16 #include <stdint.h> 17 #include <stdbool.h> 18 19 // 20 // Info about the algorithm configuration. 21 // 22 23 void 24 hs_cuda_info_u32(uint32_t * const key_words, 25 uint32_t * const val_words, 26 uint32_t * const slab_height, 27 uint32_t * const slab_width_log2); 28 29 // 30 // Determine what padding will be applied to the input and output 31 // buffers. 32 // 33 // Always check to see if the allocated buffers are large enough. 34 // 35 // count : number of keys 36 // count + count_padded_in : additional keys required for sorting 37 // count + count_padded_out : additional keys required for merging 38 // 39 40 void 41 hs_cuda_pad_u32(uint32_t const count, 42 uint32_t * const count_padded_in, 43 uint32_t * const count_padded_out); 44 45 // 46 // Sort the keys in the vin buffer and store them in the vout buffer. 47 // 48 // If vout is NULL then the sort will be performed in place. 49 // 50 // The implementation assumes the command queue is out-of-order. 51 // 52 53 void 54 hs_cuda_sort_u32(uint32_t * const vin, 55 uint32_t * const vout, 56 uint32_t const count, 57 uint32_t const count_padded_in, 58 uint32_t const count_padded_out, 59 bool const linearize, 60 cudaStream_t stream0, // primary stream 61 cudaStream_t stream1, // auxilary streams 62 cudaStream_t stream2); // for concurrency 63 64 // 65 // 66 // 67