Home
last modified time | relevance | path

Searched refs:output_calc (Results 1 – 3 of 3) sorted by relevance

/external/pytorch/aten/src/ATen/native/cuda/
DCUDALoops.cuh65 auto output_calc = TrivialOffsetCalculator<1>(); in C10_LAUNCH_BOUNDS_1() local
71 decltype(output_calc), in C10_LAUNCH_BOUNDS_1()
74 data, remaining, input_calc, output_calc, loader, storer); in C10_LAUNCH_BOUNDS_1()
131 auto output_calc = TrivialOffsetCalculator<1>(); in launch_vectorized_kernel() local
136 N, f, data, input_calc, output_calc, loader, storer); in launch_vectorized_kernel()
DReduce.cuh296 OutputCalculator output_calc; member
313 OutputCalculator output_calc, in ReduceJitOp()
326 output_calc(output_calc), in ReduceJitOp()
358 OutputCalculator output_calc; member
376 OutputCalculator output_calc, in ReduceOp()
390 output_calc(output_calc), in ReduceOp()
408 auto base_offsets1 = output_calc.get(output_idx)[1]; in run()
432 base_offsets[i] = output_calc.get(output_idx + i)[0]; in run()
795 base_offsets[i] = output_calc.get(output_idx + i)[0]; in global_reduce()
1214 auto output_calc = make_output_calculator<uint32_t>(iter); in gpu_reduce_kernel() local
[all …]
DLoops.cuh294 auto output_calc = TrivialOffsetCalculator<num_outputs>(); in gpu_kernel_multiple_outputs_impl() local
295 launch_unrolled_kernel_for_multi_outputs<num_outputs>(numel, f, data, input_calc, output_calc); in gpu_kernel_multiple_outputs_impl()
298 auto output_calc = make_output_offset_calculator<num_outputs>(iter); in gpu_kernel_multiple_outputs_impl() local
299 launch_unrolled_kernel_for_multi_outputs<num_outputs>(numel, f, data, input_calc, output_calc); in gpu_kernel_multiple_outputs_impl()