/* * Copyright 2018 Google Inc. * * Use of this source code is governed by a BSD-style license that can * be found in the LICENSE file. * */ #pragma once // // // #include #include #include // // Info about the algorithm configuration. // void hs_cuda_info_u64(uint32_t * const key_words, uint32_t * const val_words, uint32_t * const slab_height, uint32_t * const slab_width_log2); // // Determine what padding will be applied to the input and output // buffers. // // Always check to see if the allocated buffers are large enough. // // count : number of keys // count + count_padded_in : additional keys required for sorting // count + count_padded_out : additional keys required for merging // void hs_cuda_pad_u64(uint32_t const count, uint32_t * const count_padded_in, uint32_t * const count_padded_out); // // Sort the keys in the vin buffer and store them in the vout buffer. // // If vout is NULL then the sort will be performed in place. // // The implementation assumes the command queue is out-of-order. // void hs_cuda_sort_u64(uint64_t * const vin, uint64_t * const vout, uint32_t const count, uint32_t const count_padded_in, uint32_t const count_padded_out, bool const linearize, cudaStream_t stream0, // primary stream cudaStream_t stream1, // auxilary streams cudaStream_t stream2); // for concurrency // // //