Home
last modified time | relevance | path

Searched refs:thread_work_size (Results 1 – 12 of 12) sorted by relevance

/external/pytorch/aten/src/ATen/native/cuda/
Dthread_constants.h21 constexpr int thread_work_size() { return 4; } in thread_work_size() function
22 constexpr int block_work_size() { return thread_work_size() * num_threads(); } in block_work_size()
DMemoryAccess.cuh208 for (int i = 0; i < thread_work_size(); i++) { in load()
223 for (int i = 0; i < thread_work_size(); i++) { in store()
243 …static_assert(thread_work_size() % vec_size == 0, "The workload per thread must be a multiple of v…
244 static constexpr int loop_size = thread_work_size() / vec_size;
315 for (int i = 0; i < thread_work_size(); i++) { in load()
331 for (int i = 0; i < thread_work_size(); i++) { in store()
DRangeFactories.cu35 constexpr int thread_work_size = 1; variable
36 constexpr int block_work_size = thread_work_size * num_threads();
42 for (int i = 0; i < thread_work_size; i++) { in C10_LAUNCH_BOUNDS_1()
DLoops.cuh51 return_t results[thread_work_size()]; in elementwise_kernel_helper()
52 args_t args[thread_work_size()]; in elementwise_kernel_helper()
59 for (int i = 0; i < thread_work_size(); i++) { in elementwise_kernel_helper()
DFunctionOfAMatrixUtilsKernel.cu90 _lauch_kernel<num_threads(), thread_work_size()>(iter.numel(), loop); in _compute_linear_combination_internal_kernel()
DLinearAlgebra.cu138 _launch_kernel<num_threads(), thread_work_size()>(iter.numel(), loop); in unpack_pivots_cuda_kernel()
DUnfoldBackwardKernel.cu118 _launch_unfold_backward_kernel<num_threads(), thread_work_size()>(iter.numel(), loop); in _unfold_backward_internal_kernel()
DDistributionTemplates.h193 input_t_1 inputs_1[thread_work_size()]; in distribution_binary_elementwise_kernel()
194 input_t_2 inputs_2[thread_work_size()]; in distribution_binary_elementwise_kernel()
208 for (int i = 0; i < thread_work_size(); i++) { in distribution_binary_elementwise_kernel()
223 for (int i = 0; i < thread_work_size(); i++) { in distribution_binary_elementwise_kernel()
Djit_utils.cpp968 constexpr int thread_work_size = THREAD_WORK_SIZE; variable
1017 << "[" << std::to_string(thread_work_size) << "];\n"; in generate_code()
1024 << "[" << std::to_string(thread_work_size) << "];\n"; in generate_code()
DSparseBinaryOpIntersectionKernel.cu126 launch_kernel<num_threads(), thread_work_size()>(iter.numel(), loop); in binary_op_intersection_kernel()
DScatterGatherKernel.cu155 _launch_scatter_gather_kernel<num_threads(), thread_work_size()>(iter.numel(), loop); in operator ()()
377 _launch_scatter_gather_kernel<num_threads(), thread_work_size()>(iter.numel(), loop); in operator ()()
/external/pytorch/aten/src/ATen/test/
Dcuda_vectorized_test.cu79 …static_assert(vec_size <= thread_work_size() && thread_work_size() % vec_size == 0, "Invalid vec_s… in vectorized_copy()
87 scalar_t buf[thread_work_size()]; in vectorized_copy()