Searched refs:output_calc (Results 1 – 3 of 3) sorted by relevance
/external/pytorch/aten/src/ATen/native/cuda/ |
D | CUDALoops.cuh | 65 auto output_calc = TrivialOffsetCalculator<1>(); in C10_LAUNCH_BOUNDS_1() local 71 decltype(output_calc), in C10_LAUNCH_BOUNDS_1() 74 data, remaining, input_calc, output_calc, loader, storer); in C10_LAUNCH_BOUNDS_1() 131 auto output_calc = TrivialOffsetCalculator<1>(); in launch_vectorized_kernel() local 136 N, f, data, input_calc, output_calc, loader, storer); in launch_vectorized_kernel()
|
D | Reduce.cuh | 296 OutputCalculator output_calc; member 313 OutputCalculator output_calc, in ReduceJitOp() 326 output_calc(output_calc), in ReduceJitOp() 358 OutputCalculator output_calc; member 376 OutputCalculator output_calc, in ReduceOp() 390 output_calc(output_calc), in ReduceOp() 408 auto base_offsets1 = output_calc.get(output_idx)[1]; in run() 432 base_offsets[i] = output_calc.get(output_idx + i)[0]; in run() 795 base_offsets[i] = output_calc.get(output_idx + i)[0]; in global_reduce() 1214 auto output_calc = make_output_calculator<uint32_t>(iter); in gpu_reduce_kernel() local [all …]
|
D | Loops.cuh | 294 auto output_calc = TrivialOffsetCalculator<num_outputs>(); in gpu_kernel_multiple_outputs_impl() local 295 launch_unrolled_kernel_for_multi_outputs<num_outputs>(numel, f, data, input_calc, output_calc); in gpu_kernel_multiple_outputs_impl() 298 auto output_calc = make_output_offset_calculator<num_outputs>(iter); in gpu_kernel_multiple_outputs_impl() local 299 launch_unrolled_kernel_for_multi_outputs<num_outputs>(numel, f, data, input_calc, output_calc); in gpu_kernel_multiple_outputs_impl()
|