/external/executorch/kernels/optimized/vec/vec256/ |
D | vec256_int.h | 67 __at_align__ int64_t tmp_values[size()]; in blend() local 68 a.store(tmp_values); in blend() 70 tmp_values[0] = _mm256_extract_epi64(b.values, 0); in blend() 72 tmp_values[1] = _mm256_extract_epi64(b.values, 1); in blend() 74 tmp_values[2] = _mm256_extract_epi64(b.values, 2); in blend() 76 tmp_values[3] = _mm256_extract_epi64(b.values, 3); in blend() 77 return loadu(tmp_values); in blend() 105 __at_align__ int64_t tmp_values[size()]; in loadu() local 110 tmp_values[i] = 0; in loadu() 112 std::memcpy(tmp_values, ptr, count * sizeof(int64_t)); in loadu() [all …]
|
D | vec256_double.h | 80 __at_align__ double tmp_values[size()]; variable 85 tmp_values[i] = 0.0; 88 tmp_values, 91 return _mm256_load_pd(tmp_values); 97 double tmp_values[size()]; variable 98 _mm256_storeu_pd(reinterpret_cast<double*>(tmp_values), values); 99 std::memcpy(ptr, tmp_values, count * sizeof(double));
|
D | vec256_float.h | 88 __at_align__ float tmp_values[size()]; variable 93 tmp_values[i] = 0.0; 96 tmp_values, reinterpret_cast<const float*>(ptr), count * sizeof(float)); 97 return _mm256_loadu_ps(tmp_values); 103 float tmp_values[size()]; variable 104 _mm256_storeu_ps(reinterpret_cast<float*>(tmp_values), values); 105 std::memcpy(ptr, tmp_values, count * sizeof(float));
|
/external/pytorch/aten/src/ATen/cpu/vec/vec256/ |
D | vec256_int.h | 58 __at_align__ int64_t tmp_values[size()]; in blend() local 59 a.store(tmp_values); in blend() 61 tmp_values[0] = _mm256_extract_epi64(b.values, 0); in blend() 63 tmp_values[1] = _mm256_extract_epi64(b.values, 1); in blend() 65 tmp_values[2] = _mm256_extract_epi64(b.values, 2); in blend() 67 tmp_values[3] = _mm256_extract_epi64(b.values, 3); in blend() 68 return loadu(tmp_values); in blend() 96 __at_align__ int64_t tmp_values[size()]; in loadu() local 101 tmp_values[i] = 0; in loadu() 103 std::memcpy(tmp_values, ptr, count * sizeof(int64_t)); in loadu() [all …]
|
D | vec256_qint.h | 302 __at_align__ value_type tmp_values[size()]; 307 tmp_values[i] = 0; 310 tmp_values, reinterpret_cast<const value_type*>(ptr), count * sizeof(value_type)); 311 return _mm256_loadu_si256((const __m256i*)tmp_values); 501 __at_align__ value_type tmp_values[size()]; 506 tmp_values[i] = 0; 509 tmp_values, reinterpret_cast<const value_type*>(ptr), count * sizeof(value_type)); 510 return _mm256_loadu_si256((const __m256i*)tmp_values); 699 __at_align__ value_type tmp_values[size()]; 704 tmp_values[i] = 0; [all …]
|
D | vec256_bfloat16.h | 206 __at_align__ int16_t tmp_values[size()]; variable 207 std::memcpy(tmp_values, ptr, count * sizeof(int16_t)); 208 return _mm256_loadu_si256(reinterpret_cast<const __m256i*>(tmp_values)); 214 __at_align__ int16_t tmp_values[size()]; variable 215 _mm256_storeu_si256(reinterpret_cast<__m256i*>(tmp_values), values); 216 std::memcpy(ptr, tmp_values, count * sizeof(int16_t)); 221 __at_align__ int16_t tmp_values[size()]; in blend() local 222 a.store(tmp_values); in blend() 224 tmp_values[0] = _mm256_extract_epi16(b.values, 0); in blend() 226 tmp_values[1] = _mm256_extract_epi16(b.values, 1); in blend() [all …]
|
D | vec256_double.h | 72 __at_align__ double tmp_values[size()]; variable 77 tmp_values[i] = 0.0; 80 tmp_values, 83 return _mm256_load_pd(tmp_values); 89 double tmp_values[size()]; variable 90 _mm256_storeu_pd(reinterpret_cast<double*>(tmp_values), values); 91 std::memcpy(ptr, tmp_values, count * sizeof(double));
|
D | vec256_complex_double.h | 87 __at_align__ double tmp_values[2*size()]; variable 92 tmp_values[i] = 0.0; 95 tmp_values, 98 return _mm256_load_pd(tmp_values); 104 double tmp_values[2*size()]; variable 105 _mm256_storeu_pd(reinterpret_cast<double*>(tmp_values), values); 106 std::memcpy(ptr, tmp_values, count * sizeof(c10::complex<double>));
|
D | vec256_complex_float.h | 122 __at_align__ float tmp_values[2*size()]; variable 127 tmp_values[i] = 0.0; 130 tmp_values, 133 return _mm256_load_ps(tmp_values); 139 float tmp_values[2*size()]; variable 140 _mm256_storeu_ps(reinterpret_cast<float*>(tmp_values), values); 141 std::memcpy(ptr, tmp_values, count * sizeof(c10::complex<float>));
|
D | vec256_float.h | 80 __at_align__ float tmp_values[size()]; variable 85 tmp_values[i] = 0.0; 88 tmp_values, reinterpret_cast<const float*>(ptr), count * sizeof(float)); 89 return _mm256_loadu_ps(tmp_values); 95 float tmp_values[size()]; variable 96 _mm256_storeu_ps(reinterpret_cast<float*>(tmp_values), values); 97 std::memcpy(ptr, tmp_values, count * sizeof(float));
|
/external/pytorch/aten/src/ATen/ |
D | FunctionalStorageImpl.cpp | 47 std::vector<at::Tensor> tmp_values({base}); in apply_update() local 48 tmp_values.reserve(update.view_metas.size()); in apply_update() 50 …at::Tensor next_view = update.view_metas[i].forward_fn(tmp_values.back(), update.view_metas[i].out… in apply_update() 55 tmp_values.push_back(std::move(next_view)); in apply_update() 60 t = update.view_metas[i].reverse_fn(tmp_values[i], t, out_idx); in apply_update()
|
/external/pytorch/aten/src/ATen/cpu/vec/vec512/ |
D | vec512_bfloat16.h | 224 __at_align__ int16_t tmp_values[size()]; in blend() local 225 a.store(tmp_values); in blend() 227 tmp_values[0] = b.values[31]; in blend() 229 tmp_values[1] = b.values[30]; in blend() 231 tmp_values[2] = b.values[29]; in blend() 233 tmp_values[3] = b.values[28]; in blend() 235 tmp_values[4] = b.values[27]; in blend() 237 tmp_values[5] = b.values[26]; in blend() 239 tmp_values[6] = b.values[25]; in blend() 241 tmp_values[7] = b.values[24]; in blend() [all …]
|
D | vec512_qint.h | 316 __at_align__ value_type tmp_values[size()]; 321 tmp_values[i] = 0; 323 … std::memcpy(tmp_values, reinterpret_cast<const value_type*>(ptr), count * sizeof(value_type)); 324 return loadu(tmp_values); 514 __at_align__ value_type tmp_values[size()]; 519 tmp_values[i] = 0; 521 … std::memcpy(tmp_values, reinterpret_cast<const value_type*>(ptr), count * sizeof(value_type)); 522 return loadu(tmp_values); 738 __at_align__ value_type tmp_values[size()]; 743 tmp_values[i] = 0; [all …]
|
D | vec512_complex_double.h | 127 __at_align__ double tmp_values[2*size()]; variable 132 tmp_values[i] = 0.0; 135 tmp_values, 138 return _mm512_load_pd(tmp_values); 144 double tmp_values[2*size()]; variable 145 _mm512_storeu_pd(reinterpret_cast<double*>(tmp_values), values); 146 std::memcpy(ptr, tmp_values, count * sizeof(c10::complex<double>));
|
/external/pytorch/aten/src/ATen/cpu/vec/vec256/vsx/ |
D | vec256_int64_vsx.h | 152 __at_align__ double tmp_values[size()] = {}; variable 153 std::memcpy(tmp_values, ptr, std::min(count, size()) * sizeof(value_type)); 156 (vint64)vec_vsx_ld(offset0, tmp_values), 157 (vint64)vec_vsx_ld(offset16, tmp_values)}; 165 __at_align__ double tmp_values[size()]; variable 166 vec_vsx_st((vfloat64)_vec0, offset0, tmp_values); 167 vec_vsx_st((vfloat64)_vec1, offset16, tmp_values); 169 ptr, tmp_values, std::min(count, size()) * sizeof(value_type));
|
D | vec256_qint32_vsx.h | 84 __at_align__ value_type tmp_values[size()] = {}; 85 std::memcpy(tmp_values, ptr, std::min(count, size()) * sizeof(value_type)); 87 return {vec_vsx_ld(offset0, tmp_values), vec_vsx_ld(offset16, tmp_values)}; 94 __at_align__ value_type tmp_values[size()]; 95 vec_vsx_st(_vec0, offset0, tmp_values); 96 vec_vsx_st(_vec1, offset16, tmp_values); 98 ptr, tmp_values, std::min(count, size()) * sizeof(value_type));
|
D | vec256_int32_vsx.h | 202 __at_align__ value_type tmp_values[size()] = {}; variable 203 std::memcpy(tmp_values, ptr, std::min(count, size()) * sizeof(value_type)); 205 return {vec_vsx_ld(offset0, tmp_values), vec_vsx_ld(offset16, tmp_values)}; 212 __at_align__ value_type tmp_values[size()]; variable 213 vec_vsx_st(_vec0, offset0, tmp_values); 214 vec_vsx_st(_vec1, offset16, tmp_values); 216 ptr, tmp_values, std::min(count, size()) * sizeof(value_type));
|
D | vec256_float_vsx.h | 183 __at_align__ value_type tmp_values[size()] = {}; variable 184 std::memcpy(tmp_values, ptr, std::min(count, size()) * sizeof(value_type)); 186 return {vec_vsx_ld(offset0, tmp_values), vec_vsx_ld(offset16, tmp_values)}; 193 __at_align__ value_type tmp_values[size()]; variable 194 vec_vsx_st(_vec0, offset0, tmp_values); 195 vec_vsx_st(_vec1, offset16, tmp_values); 197 ptr, tmp_values, std::min(count, size()) * sizeof(value_type));
|
D | vec256_double_vsx.h | 174 __at_align__ value_type tmp_values[size()] = {}; variable 175 std::memcpy(tmp_values, ptr, std::min(count, size()) * sizeof(value_type)); 177 return {vec_vsx_ld(offset0, tmp_values), vec_vsx_ld(offset16, tmp_values)}; 184 __at_align__ value_type tmp_values[size()]; variable 185 vec_vsx_st(_vec0, offset0, tmp_values); 186 vec_vsx_st(_vec1, offset16, tmp_values); 188 ptr, tmp_values, std::min(count, size()) * sizeof(value_type));
|
D | vec256_int16_vsx.h | 272 __at_align__ value_type tmp_values[size()] = {}; variable 273 std::memcpy(tmp_values, ptr, std::min(count, size()) * sizeof(value_type)); 275 return {vec_vsx_ld(offset0, tmp_values), vec_vsx_ld(offset16, tmp_values)}; 282 __at_align__ value_type tmp_values[size()]; variable 283 vec_vsx_st(_vec0, offset0, tmp_values); 284 vec_vsx_st(_vec1, offset16, tmp_values); 285 std::memcpy(ptr, tmp_values, std::min(count, size()) * sizeof(value_type));
|
D | vec256_complex_double_vsx.h | 145 __at_align__ value_type tmp_values[size()] = {}; variable 146 std::memcpy(tmp_values, ptr, std::min(count, size()) * sizeof(value_type)); 149 vec_vsx_ld(offset0, reinterpret_cast<const double*>(tmp_values)), 150 vec_vsx_ld(offset16, reinterpret_cast<const double*>(tmp_values))}; 157 __at_align__ value_type tmp_values[size()]; variable 158 vec_vsx_st(_vec0, offset0, reinterpret_cast<double*>(tmp_values)); 159 vec_vsx_st(_vec1, offset16, reinterpret_cast<double*>(tmp_values)); 161 ptr, tmp_values, std::min(count, size()) * sizeof(value_type));
|
D | vec256_qint8_vsx.h | 94 __at_align__ value_type tmp_values[size()] = {}; 95 std::memcpy(tmp_values, ptr, std::min(count, size()) * sizeof(value_type)); 96 return {vec_vsx_ld(offset0, tmp_values), vec_vsx_ld(offset16, tmp_values)}; 103 __at_align__ value_type tmp_values[size()]; 104 vec_vsx_st(_vec0, offset0, tmp_values); 105 vec_vsx_st(_vec1, offset16, tmp_values); 107 ptr, tmp_values, std::min(count, size()) * sizeof(value_type));
|
D | vec256_quint8_vsx.h | 97 __at_align__ value_type tmp_values[size()] = {}; 98 std::memcpy(tmp_values, ptr, std::min(count, size()) * sizeof(value_type)); 99 return {vec_vsx_ld(offset0, tmp_values), vec_vsx_ld(offset16, tmp_values)}; 106 __at_align__ value_type tmp_values[size()]; 107 vec_vsx_st(_vec0, offset0, tmp_values); 108 vec_vsx_st(_vec1, offset16, tmp_values); 110 ptr, tmp_values, std::min(count, size()) * sizeof(value_type));
|
D | vec256_complex_float_vsx.h | 206 __at_align__ value_type tmp_values[size()] = {}; variable 207 std::memcpy(tmp_values, ptr, std::min(count, size()) * sizeof(value_type)); 210 vec_vsx_ld(offset0, reinterpret_cast<const float*>(tmp_values)), 211 vec_vsx_ld(offset16, reinterpret_cast<const float*>(tmp_values))}; 219 __at_align__ value_type tmp_values[size()]; variable 220 vec_vsx_st(_vec0, offset0, reinterpret_cast<float*>(tmp_values)); 221 vec_vsx_st(_vec1, offset16, reinterpret_cast<float*>(tmp_values)); 223 ptr, tmp_values, std::min(count, size()) * sizeof(value_type));
|
/external/pytorch/aten/src/ATen/native/ |
D | TopKImpl.h | 39 TensorAccessor<const scalar_t, 1> tmp_values( in topk_impl_loop() 47 queue[j].first = tmp_values[j]; in topk_impl_loop()
|