Home
last modified time | relevance | path

Searched refs:shared_memory_size (Results 1 – 4 of 4) sorted by relevance

/external/tensorflow/tensorflow/core/kernels/sparse/
Dkernels_gpu.cu.cc248 const size_t shared_memory_size = sizeof(int) * (batch_size + 1); in operator ()() local
251 config.thread_per_block, shared_memory_size, d.stream(), in operator ()()
316 const size_t shared_memory_size = in CSRSparseMatrixBatchMulVecImpl() local
321 config.thread_per_block, shared_memory_size, d.stream(), a_values.data(), in CSRSparseMatrixBatchMulVecImpl()
454 const size_t shared_memory_size = sizeof(int) * (batch_size + 1); in CSRSparseMatrixSoftmaxGPUImpl() local
457 shared_memory_size, d.stream(), size, rows, in CSRSparseMatrixSoftmaxGPUImpl()
659 const size_t shared_memory_size = 2 * sizeof(int) * (batch_size + 1); in CSRSparseMatrixSoftmaxGradGPUImpl() local
662 config.thread_per_block, shared_memory_size, d.stream(), size, rows, in CSRSparseMatrixSoftmaxGradGPUImpl()
/external/tensorflow/tensorflow/core/kernels/
Dbias_op_gpu.cu.cc229 int32 shared_memory_size = 0; in compute() local
231 shared_memory_size = bias_size * sizeof(typename AccumulatorType<T>::type); in compute()
234 if (shared_memory_size <= max_shared_memory_size) { in compute()
238 shared_memory_size, d.stream(), total_count, in compute()
Dtopk_op_gpu.h384 constexpr auto shared_memory_size = 48 << 10; // 48 KB in LaunchTopKKernel() local
387 num_shards = shared_memory_size / heap_size - 1; in LaunchTopKKernel()
411 auto shared_memory_size = (num_shards + 1) * k * sizeof(Entry<T>); variable
414 shared_memory_size, stream, input, length, k,
Ddepthwise_conv_op_gpu.h660 const int shared_memory_size =
665 num_outputs, device, kernel, shared_memory_size,
668 shared_memory_size, device.stream(), args, input,
1589 const int shared_memory_size =
1591 if (shared_memory_size > device.sharedMemPerBlock()) {
1619 num_out_backprop, device, kernel, shared_memory_size,
1622 shared_memory_size, device.stream(), args,