Searched refs:aligned_vector (Results 1 – 8 of 8) sorted by relevance
/external/pytorch/aten/src/ATen/native/cuda/ |
D | MemoryAccess.cuh | 160 struct alignas(sizeof(scalar_t) * vec_size) aligned_vector { struct 165 __device__ aligned_vector<scalar_t, vec_size> load_vector(const scalar_t *base_ptr, uint32_t offset… in load_vector() 166 using vec_t = aligned_vector<scalar_t, vec_size>; in load_vector() 172 __device__ aligned_vector<bool, vec_size> load_vector(const bool *base_ptr, uint32_t offset) { in load_vector() 175 aligned_vector<bool, vec_size> ret; in load_vector() 276 using vec_t = aligned_vector<scalar_t, vec_size>; in store() 351 constexpr int vec2_alignment = std::alignment_of<aligned_vector<scalar_t, 2>>::value; in can_vectorize_up_to() 352 constexpr int vec4_alignment = std::alignment_of<aligned_vector<scalar_t, 4>>::value; in can_vectorize_up_to()
|
D | SoftMax.cu | 460 using LoadT = at::native::memory::aligned_vector<T, ILP>; in ilpReduce() 507 using LoadT = at::native::memory::aligned_vector<scalar_t, ILP>; in WriteFpropResultsVectorized() 508 using StoreT = at::native::memory::aligned_vector<outscalar_t, ILP>; in WriteFpropResultsVectorized() 561 using gradInputT = at::native::memory::aligned_vector<scalar_t, ILP>; in WriteBpropResultsVectorized() 562 using outputT = at::native::memory::aligned_vector<outscalar_t, ILP>; in WriteBpropResultsVectorized() 716 using LoadT = at::native::memory::aligned_vector<scalar_t, ILP>; in cunn_SoftMaxForwardSmem() 754 using StoreT = at::native::memory::aligned_vector<outscalar_t, ILP>; in cunn_SoftMaxForwardSmem() 777 using LoadT = at::native::memory::aligned_vector<scalar_t, ILP>; in cunn_SoftMaxBackward() 778 using StoreT = at::native::memory::aligned_vector<outscalar_t, ILP>; in cunn_SoftMaxBackward()
|
D | Dropout.cu | 56 using LoadT = memory::aligned_vector<scalar_t, VEC>; in fused_dropout_kernel_vec() 57 using MaskLoadT = memory::aligned_vector<mask_t, VEC>; in fused_dropout_kernel_vec()
|
D | MultiTensorApply.cuh | 37 using LT = at::native::memory::aligned_vector<T, kILP>; in load_store()
|
D | Reduce.cuh | 507 … constexpr int align_bytes = alignof(at::native::memory::aligned_vector<scalar_t, input_vec_size>); in input_vectorized_thread_reduce_impl() 522 using load_t = at::native::memory::aligned_vector<scalar_t, input_vec_size>; in input_vectorized_thread_reduce_impl() 570 using load_t = at::native::memory::aligned_vector<scalar_t, output_vec_size>; in thread_reduce_impl()
|
D | layer_norm_kernel.cu | 41 struct alignas(sizeof(scalar_t) * vec_size) aligned_vector { struct 162 using vec_t = aligned_vector<T, vec_size>; in compute_stats() 236 using vec_t = aligned_vector<T, vec_size>; in vectorized_layer_norm_kernel_impl() 418 using vec_t = aligned_vector<T, vec_size>; in layer_norm_grad_input_kernel_vectorized()
|
D | Shape.cu | 265 using LT = at::native::memory::aligned_vector<T, kILP>; in CatArrayBatchedCopy_aligned16_contig()
|
/external/pytorch/aten/src/ATen/native/transformers/cuda/ |
D | attention.cu | 119 using LoadT = memory::aligned_vector<scalar_t, VEC>; in transform_bias_rescale_qkv_kernel() 226 using LoadT = memory::aligned_vector<scalar_t, VEC>; in transform_bias_rescale_qkv_add_padding_kernel()
|