Searched refs:thread_work_size (Results 1 – 12 of 12) sorted by relevance
/external/pytorch/aten/src/ATen/native/cuda/ |
D | thread_constants.h | 21 constexpr int thread_work_size() { return 4; } in thread_work_size() function 22 constexpr int block_work_size() { return thread_work_size() * num_threads(); } in block_work_size()
|
D | MemoryAccess.cuh | 208 for (int i = 0; i < thread_work_size(); i++) { in load() 223 for (int i = 0; i < thread_work_size(); i++) { in store() 243 …static_assert(thread_work_size() % vec_size == 0, "The workload per thread must be a multiple of v… 244 static constexpr int loop_size = thread_work_size() / vec_size; 315 for (int i = 0; i < thread_work_size(); i++) { in load() 331 for (int i = 0; i < thread_work_size(); i++) { in store()
|
D | RangeFactories.cu | 35 constexpr int thread_work_size = 1; variable 36 constexpr int block_work_size = thread_work_size * num_threads(); 42 for (int i = 0; i < thread_work_size; i++) { in C10_LAUNCH_BOUNDS_1()
|
D | Loops.cuh | 51 return_t results[thread_work_size()]; in elementwise_kernel_helper() 52 args_t args[thread_work_size()]; in elementwise_kernel_helper() 59 for (int i = 0; i < thread_work_size(); i++) { in elementwise_kernel_helper()
|
D | FunctionOfAMatrixUtilsKernel.cu | 90 _lauch_kernel<num_threads(), thread_work_size()>(iter.numel(), loop); in _compute_linear_combination_internal_kernel()
|
D | LinearAlgebra.cu | 138 _launch_kernel<num_threads(), thread_work_size()>(iter.numel(), loop); in unpack_pivots_cuda_kernel()
|
D | UnfoldBackwardKernel.cu | 118 _launch_unfold_backward_kernel<num_threads(), thread_work_size()>(iter.numel(), loop); in _unfold_backward_internal_kernel()
|
D | DistributionTemplates.h | 193 input_t_1 inputs_1[thread_work_size()]; in distribution_binary_elementwise_kernel() 194 input_t_2 inputs_2[thread_work_size()]; in distribution_binary_elementwise_kernel() 208 for (int i = 0; i < thread_work_size(); i++) { in distribution_binary_elementwise_kernel() 223 for (int i = 0; i < thread_work_size(); i++) { in distribution_binary_elementwise_kernel()
|
D | jit_utils.cpp | 968 constexpr int thread_work_size = THREAD_WORK_SIZE; variable 1017 << "[" << std::to_string(thread_work_size) << "];\n"; in generate_code() 1024 << "[" << std::to_string(thread_work_size) << "];\n"; in generate_code()
|
D | SparseBinaryOpIntersectionKernel.cu | 126 launch_kernel<num_threads(), thread_work_size()>(iter.numel(), loop); in binary_op_intersection_kernel()
|
D | ScatterGatherKernel.cu | 155 _launch_scatter_gather_kernel<num_threads(), thread_work_size()>(iter.numel(), loop); in operator ()() 377 _launch_scatter_gather_kernel<num_threads(), thread_work_size()>(iter.numel(), loop); in operator ()()
|
/external/pytorch/aten/src/ATen/test/ |
D | cuda_vectorized_test.cu | 79 …static_assert(vec_size <= thread_work_size() && thread_work_size() % vec_size == 0, "Invalid vec_s… in vectorized_copy() 87 scalar_t buf[thread_work_size()]; in vectorized_copy()
|