Home
last modified time | relevance | path

Searched refs:n_elems_per_thread (Results 1 – 3 of 3) sorted by relevance

/external/pytorch/aten/src/ATen/native/cuda/
DFunctionOfAMatrixUtilsKernel.cu14 template <int n_threads, int n_elems_per_thread, typename func_t>
15 C10_LAUNCH_BOUNDS_2(n_threads, n_elems_per_thread) in C10_LAUNCH_BOUNDS_2() argument
17 constexpr int total_work_block = n_threads * n_elems_per_thread; in C10_LAUNCH_BOUNDS_2()
21 for (int i = 0; i < n_elems_per_thread; ++i) { in C10_LAUNCH_BOUNDS_2()
29 template <int n_threads, int n_elems_per_thread, typename func_t>
36 constexpr int total_work_block = n_threads * n_elems_per_thread; in _lauch_kernel()
40 _elemwise_kernel<n_threads, n_elems_per_thread, func_t> in _lauch_kernel()
DLinearAlgebra.cu75 template <int n_threads, int n_elems_per_thread, typename func_t>
76 C10_LAUNCH_BOUNDS_2(n_threads, n_elems_per_thread) in C10_LAUNCH_BOUNDS_2() argument
78 constexpr int total_work_block = n_threads * n_elems_per_thread; in C10_LAUNCH_BOUNDS_2()
82 for (int i = 0; i < n_elems_per_thread; ++i) { in C10_LAUNCH_BOUNDS_2()
90 template <int n_threads, int n_elems_per_thread, typename func_t>
97 constexpr int total_work_block = n_threads * n_elems_per_thread; in _launch_kernel()
101 _elementwise_kernel<n_threads, n_elems_per_thread, func_t> in _launch_kernel()
DUnfoldBackwardKernel.cu22 template <int n_threads, int n_elems_per_thread, typename func_t>
23 C10_LAUNCH_BOUNDS_2(n_threads, n_elems_per_thread) in C10_LAUNCH_BOUNDS_2() argument
25 constexpr int total_work_block = n_threads * n_elems_per_thread; in C10_LAUNCH_BOUNDS_2()
29 for (int i = 0; i < n_elems_per_thread; ++i) { in C10_LAUNCH_BOUNDS_2()
37 template <int n_threads, int n_elems_per_thread, typename func_t>
44 constexpr int total_work_block = n_threads * n_elems_per_thread; in _launch_unfold_backward_kernel()
48 _unfold_backward_elementwise_kernel<n_threads, n_elems_per_thread, func_t> in _launch_unfold_backward_kernel()