Searched refs:n_elems_per_thread (Results 1 – 3 of 3) sorted by relevance
| /external/pytorch/aten/src/ATen/native/cuda/ |
| D | FunctionOfAMatrixUtilsKernel.cu | 14 template <int n_threads, int n_elems_per_thread, typename func_t> 15 C10_LAUNCH_BOUNDS_2(n_threads, n_elems_per_thread) in C10_LAUNCH_BOUNDS_2() argument 17 constexpr int total_work_block = n_threads * n_elems_per_thread; in C10_LAUNCH_BOUNDS_2() 21 for (int i = 0; i < n_elems_per_thread; ++i) { in C10_LAUNCH_BOUNDS_2() 29 template <int n_threads, int n_elems_per_thread, typename func_t> 36 constexpr int total_work_block = n_threads * n_elems_per_thread; in _lauch_kernel() 40 _elemwise_kernel<n_threads, n_elems_per_thread, func_t> in _lauch_kernel()
|
| D | LinearAlgebra.cu | 75 template <int n_threads, int n_elems_per_thread, typename func_t> 76 C10_LAUNCH_BOUNDS_2(n_threads, n_elems_per_thread) in C10_LAUNCH_BOUNDS_2() argument 78 constexpr int total_work_block = n_threads * n_elems_per_thread; in C10_LAUNCH_BOUNDS_2() 82 for (int i = 0; i < n_elems_per_thread; ++i) { in C10_LAUNCH_BOUNDS_2() 90 template <int n_threads, int n_elems_per_thread, typename func_t> 97 constexpr int total_work_block = n_threads * n_elems_per_thread; in _launch_kernel() 101 _elementwise_kernel<n_threads, n_elems_per_thread, func_t> in _launch_kernel()
|
| D | UnfoldBackwardKernel.cu | 22 template <int n_threads, int n_elems_per_thread, typename func_t> 23 C10_LAUNCH_BOUNDS_2(n_threads, n_elems_per_thread) in C10_LAUNCH_BOUNDS_2() argument 25 constexpr int total_work_block = n_threads * n_elems_per_thread; in C10_LAUNCH_BOUNDS_2() 29 for (int i = 0; i < n_elems_per_thread; ++i) { in C10_LAUNCH_BOUNDS_2() 37 template <int n_threads, int n_elems_per_thread, typename func_t> 44 constexpr int total_work_block = n_threads * n_elems_per_thread; in _launch_unfold_backward_kernel() 48 _unfold_backward_elementwise_kernel<n_threads, n_elems_per_thread, func_t> in _launch_unfold_backward_kernel()
|