/external/pytorch/aten/src/ATen/native/transformers/cuda/mem_eff_attention/kernels/ |
D | cutlassB_bf16_aligned_k128.cu | 12 …AttentionBackwardKernel<cutlass::arch::Sm80, cutlass::bfloat16_t, true, false, true, 128, 128, 128… in __launch_bounds__() 31 …AttentionBackwardKernel<cutlass::arch::Sm80, cutlass::bfloat16_t, true, false, true, 128, 128, 128… in __launch_bounds__() 50 …AttentionBackwardKernel<cutlass::arch::Sm80, cutlass::bfloat16_t, true, false, false, 64, 64, 128,… in __launch_bounds__() 69 …AttentionBackwardKernel<cutlass::arch::Sm80, cutlass::bfloat16_t, true, false, false, 64, 64, 128>… in __launch_bounds__()
|
D | cutlassF_bf16_aligned.cu | 12 …AttentionKernel<cutlass::bfloat16_t, cutlass::arch::Sm80, true, 64, 64, 64, true, true>::kNumThrea… in __launch_bounds__() 31 …AttentionKernel<cutlass::bfloat16_t, cutlass::arch::Sm80, true, 64, 128, 128, true, true>::kNumThr… in __launch_bounds__() 50 …AttentionKernel<cutlass::bfloat16_t, cutlass::arch::Sm80, true, 32, 128, 65536, true, true>::kNumT… in __launch_bounds__()
|
D | cutlassB_bf16_aligned_k65536_dropout.cu | 12 …AttentionBackwardKernel<cutlass::arch::Sm80, cutlass::bfloat16_t, true, true, false, 128, 64, 6553… in __launch_bounds__() 31 …AttentionBackwardKernel<cutlass::arch::Sm80, cutlass::bfloat16_t, true, true, false, 64, 64, 65536… in __launch_bounds__()
|
D | cutlassB_bf16_aligned_k65536.cu | 12 …AttentionBackwardKernel<cutlass::arch::Sm80, cutlass::bfloat16_t, true, false, false, 128, 64, 655… in __launch_bounds__() 31 …AttentionBackwardKernel<cutlass::arch::Sm80, cutlass::bfloat16_t, true, false, false, 64, 64, 6553… in __launch_bounds__()
|
D | cutlassB_bf16_aligned_k128_dropout.cu | 12 …AttentionBackwardKernel<cutlass::arch::Sm80, cutlass::bfloat16_t, true, true, true, 128, 128, 128>… in __launch_bounds__() 31 …AttentionBackwardKernel<cutlass::arch::Sm80, cutlass::bfloat16_t, true, true, false, 64, 64, 128>:… in __launch_bounds__()
|
D | cutlassB_bf16_aligned_k32.cu | 12 …AttentionBackwardKernel<cutlass::arch::Sm80, cutlass::bfloat16_t, true, false, true, 64, 64, 32, t… in __launch_bounds__() 31 …AttentionBackwardKernel<cutlass::arch::Sm80, cutlass::bfloat16_t, true, false, true, 64, 64, 32>::… in __launch_bounds__()
|
D | cutlassB_bf16_aligned_k64.cu | 12 …AttentionBackwardKernel<cutlass::arch::Sm80, cutlass::bfloat16_t, true, false, true, 64, 64, 64, t… in __launch_bounds__() 31 …AttentionBackwardKernel<cutlass::arch::Sm80, cutlass::bfloat16_t, true, false, true, 64, 64, 64>::… in __launch_bounds__()
|
D | cutlassB_bf16_aligned_k96.cu | 12 …AttentionBackwardKernel<cutlass::arch::Sm80, cutlass::bfloat16_t, true, false, true, 128, 64, 96>:… in __launch_bounds__()
|
D | cutlassB_bf16_aligned_k64_dropout.cu | 12 …AttentionBackwardKernel<cutlass::arch::Sm80, cutlass::bfloat16_t, true, true, true, 64, 64, 64>::k… in __launch_bounds__()
|
D | cutlassB_f16_aligned_k96.cu | 12 …AttentionBackwardKernel<cutlass::arch::Sm80, cutlass::half_t, true, false, true, 128, 64, 96>::kNu… in __launch_bounds__()
|
D | cutlassB_bf16_aligned_k32_dropout.cu | 12 …AttentionBackwardKernel<cutlass::arch::Sm80, cutlass::bfloat16_t, true, true, true, 64, 64, 32>::k… in __launch_bounds__()
|
D | cutlassB_f16_aligned_k128.cu | 31 …AttentionBackwardKernel<cutlass::arch::Sm80, cutlass::half_t, true, false, true, 128, 128, 128, tr… in __launch_bounds__() 88 …AttentionBackwardKernel<cutlass::arch::Sm80, cutlass::half_t, true, false, true, 128, 128, 128>::k… in __launch_bounds__() 126 …AttentionBackwardKernel<cutlass::arch::Sm80, cutlass::half_t, true, false, false, 64, 64, 128, tru… in __launch_bounds__() 202 …AttentionBackwardKernel<cutlass::arch::Sm80, cutlass::half_t, true, false, false, 64, 64, 128>::kN… in __launch_bounds__()
|
D | cutlassB_f32_aligned_k128_dropout.cu | 12 … AttentionBackwardKernel<cutlass::arch::Sm80, float, true, true, false, 128, 64, 128>::kNumThreads, in __launch_bounds__() 88 … AttentionBackwardKernel<cutlass::arch::Sm80, float, true, true, false, 64, 64, 128>::kNumThreads, in __launch_bounds__()
|
D | cutlassB_f32_aligned_k128.cu | 12 …AttentionBackwardKernel<cutlass::arch::Sm80, float, true, false, false, 128, 64, 128>::kNumThreads, in __launch_bounds__() 88 … AttentionBackwardKernel<cutlass::arch::Sm80, float, true, false, false, 64, 64, 128>::kNumThreads, in __launch_bounds__()
|
D | cutlassB_f32_aligned_k65536.cu | 12 …AttentionBackwardKernel<cutlass::arch::Sm80, float, true, false, false, 128, 64, 65536>::kNumThrea… in __launch_bounds__() 88 …AttentionBackwardKernel<cutlass::arch::Sm80, float, true, false, false, 64, 64, 65536>::kNumThread… in __launch_bounds__()
|
D | cutlassB_f32_aligned_k65536_dropout.cu | 12 …AttentionBackwardKernel<cutlass::arch::Sm80, float, true, true, false, 128, 64, 65536>::kNumThread… in __launch_bounds__() 88 …AttentionBackwardKernel<cutlass::arch::Sm80, float, true, true, false, 64, 64, 65536>::kNumThreads, in __launch_bounds__()
|
D | cutlassB_f16_aligned_k32.cu | 31 …AttentionBackwardKernel<cutlass::arch::Sm80, cutlass::half_t, true, false, true, 64, 64, 32, true>… in __launch_bounds__() 107 …AttentionBackwardKernel<cutlass::arch::Sm80, cutlass::half_t, true, false, true, 64, 64, 32>::kNum… in __launch_bounds__()
|
D | cutlassB_f16_aligned_k64.cu | 31 …AttentionBackwardKernel<cutlass::arch::Sm80, cutlass::half_t, true, false, true, 64, 64, 64, true>… in __launch_bounds__() 107 …AttentionBackwardKernel<cutlass::arch::Sm80, cutlass::half_t, true, false, true, 64, 64, 64>::kNum… in __launch_bounds__()
|
D | cutlassB_f16_aligned_k65536_dropout.cu | 50 …AttentionBackwardKernel<cutlass::arch::Sm80, cutlass::half_t, true, true, false, 128, 64, 65536>::… in __launch_bounds__() 126 …AttentionBackwardKernel<cutlass::arch::Sm80, cutlass::half_t, true, true, false, 64, 64, 65536>::k… in __launch_bounds__()
|
D | cutlassB_f16_aligned_k128_dropout.cu | 50 …AttentionBackwardKernel<cutlass::arch::Sm80, cutlass::half_t, true, true, true, 128, 128, 128>::kN… in __launch_bounds__() 126 …AttentionBackwardKernel<cutlass::arch::Sm80, cutlass::half_t, true, true, false, 64, 64, 128>::kNu… in __launch_bounds__()
|
D | cutlassB_f16_aligned_k65536.cu | 50 …AttentionBackwardKernel<cutlass::arch::Sm80, cutlass::half_t, true, false, false, 128, 64, 65536>:… in __launch_bounds__() 126 …AttentionBackwardKernel<cutlass::arch::Sm80, cutlass::half_t, true, false, false, 64, 64, 65536>::… in __launch_bounds__()
|
D | cutlassF_f16_aligned.cu | 69 … AttentionKernel<cutlass::half_t, cutlass::arch::Sm80, true, 64, 64, 64, true, true>::kNumThreads, in __launch_bounds__() 145 …AttentionKernel<cutlass::half_t, cutlass::arch::Sm80, true, 64, 128, 128, true, true>::kNumThreads, in __launch_bounds__() 221 …AttentionKernel<cutlass::half_t, cutlass::arch::Sm80, true, 32, 128, 65536, true, true>::kNumThrea… in __launch_bounds__()
|
D | cutlassF_f32_aligned.cu | 69 AttentionKernel<float, cutlass::arch::Sm80, true, 64, 64, 64, true, true>::kNumThreads, in __launch_bounds__() 145 AttentionKernel<float, cutlass::arch::Sm80, true, 64, 128, 128, true, true>::kNumThreads, in __launch_bounds__() 221 AttentionKernel<float, cutlass::arch::Sm80, true, 32, 128, 65536, true, true>::kNumThreads, in __launch_bounds__()
|
D | cutlassB_f16_aligned_k32_dropout.cu | 69 …AttentionBackwardKernel<cutlass::arch::Sm80, cutlass::half_t, true, true, true, 64, 64, 32>::kNumT… in __launch_bounds__()
|
D | cutlassB_f32_aligned_k64.cu | 69 … AttentionBackwardKernel<cutlass::arch::Sm80, float, true, false, false, 64, 64, 64>::kNumThreads, in __launch_bounds__()
|