Home
last modified time | relevance | path

Searched refs:aligned_vector (Results 1 – 8 of 8) sorted by relevance

/external/pytorch/aten/src/ATen/native/cuda/
DMemoryAccess.cuh160 struct alignas(sizeof(scalar_t) * vec_size) aligned_vector { struct
165 __device__ aligned_vector<scalar_t, vec_size> load_vector(const scalar_t *base_ptr, uint32_t offset… in load_vector()
166 using vec_t = aligned_vector<scalar_t, vec_size>; in load_vector()
172 __device__ aligned_vector<bool, vec_size> load_vector(const bool *base_ptr, uint32_t offset) { in load_vector()
175 aligned_vector<bool, vec_size> ret; in load_vector()
276 using vec_t = aligned_vector<scalar_t, vec_size>; in store()
351 constexpr int vec2_alignment = std::alignment_of<aligned_vector<scalar_t, 2>>::value; in can_vectorize_up_to()
352 constexpr int vec4_alignment = std::alignment_of<aligned_vector<scalar_t, 4>>::value; in can_vectorize_up_to()
DSoftMax.cu460 using LoadT = at::native::memory::aligned_vector<T, ILP>; in ilpReduce()
507 using LoadT = at::native::memory::aligned_vector<scalar_t, ILP>; in WriteFpropResultsVectorized()
508 using StoreT = at::native::memory::aligned_vector<outscalar_t, ILP>; in WriteFpropResultsVectorized()
561 using gradInputT = at::native::memory::aligned_vector<scalar_t, ILP>; in WriteBpropResultsVectorized()
562 using outputT = at::native::memory::aligned_vector<outscalar_t, ILP>; in WriteBpropResultsVectorized()
716 using LoadT = at::native::memory::aligned_vector<scalar_t, ILP>; in cunn_SoftMaxForwardSmem()
754 using StoreT = at::native::memory::aligned_vector<outscalar_t, ILP>; in cunn_SoftMaxForwardSmem()
777 using LoadT = at::native::memory::aligned_vector<scalar_t, ILP>; in cunn_SoftMaxBackward()
778 using StoreT = at::native::memory::aligned_vector<outscalar_t, ILP>; in cunn_SoftMaxBackward()
DDropout.cu56 using LoadT = memory::aligned_vector<scalar_t, VEC>; in fused_dropout_kernel_vec()
57 using MaskLoadT = memory::aligned_vector<mask_t, VEC>; in fused_dropout_kernel_vec()
DMultiTensorApply.cuh37 using LT = at::native::memory::aligned_vector<T, kILP>; in load_store()
DReduce.cuh507 … constexpr int align_bytes = alignof(at::native::memory::aligned_vector<scalar_t, input_vec_size>); in input_vectorized_thread_reduce_impl()
522 using load_t = at::native::memory::aligned_vector<scalar_t, input_vec_size>; in input_vectorized_thread_reduce_impl()
570 using load_t = at::native::memory::aligned_vector<scalar_t, output_vec_size>; in thread_reduce_impl()
Dlayer_norm_kernel.cu41 struct alignas(sizeof(scalar_t) * vec_size) aligned_vector { struct
162 using vec_t = aligned_vector<T, vec_size>; in compute_stats()
236 using vec_t = aligned_vector<T, vec_size>; in vectorized_layer_norm_kernel_impl()
418 using vec_t = aligned_vector<T, vec_size>; in layer_norm_grad_input_kernel_vectorized()
DShape.cu265 using LT = at::native::memory::aligned_vector<T, kILP>; in CatArrayBatchedCopy_aligned16_contig()
/external/pytorch/aten/src/ATen/native/transformers/cuda/
Dattention.cu119 using LoadT = memory::aligned_vector<scalar_t, VEC>; in transform_bias_rescale_qkv_kernel()
226 using LoadT = memory::aligned_vector<scalar_t, VEC>; in transform_bias_rescale_qkv_add_padding_kernel()