Home
last modified time | relevance | path

Searched refs:vi4x01234567 (Results 1 – 16 of 16) sorted by relevance

/external/XNNPACK/src/f32-dwconv/gen/
Dup16x9-fma3-acc2.c98 const __m256 vi4x01234567 = _mm256_loadu_ps(i4); in xnn_f32_dwconv_ukernel_up16x9__fma3_acc2() local
104 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_ukernel_up16x9__fma3_acc2()
185 const __m256 vi4x01234567 = _mm256_loadu_ps(i4); in xnn_f32_dwconv_ukernel_up16x9__fma3_acc2() local
189 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_ukernel_up16x9__fma3_acc2()
249 const __m256 vi4x01234567 = _mm256_maskload_ps(i4, vmask); in xnn_f32_dwconv_ukernel_up16x9__fma3_acc2() local
251 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_ukernel_up16x9__fma3_acc2()
Dup16x9-fma3.c98 const __m256 vi4x01234567 = _mm256_loadu_ps(i4); in xnn_f32_dwconv_ukernel_up16x9__fma3() local
104 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_ukernel_up16x9__fma3()
182 const __m256 vi4x01234567 = _mm256_loadu_ps(i4); in xnn_f32_dwconv_ukernel_up16x9__fma3() local
186 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_ukernel_up16x9__fma3()
244 const __m256 vi4x01234567 = _mm256_maskload_ps(i4, vmask); in xnn_f32_dwconv_ukernel_up16x9__fma3() local
246 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_ukernel_up16x9__fma3()
Dup16x9-avx-acc2.c98 const __m256 vi4x01234567 = _mm256_loadu_ps(i4); in xnn_f32_dwconv_ukernel_up16x9__avx_acc2() local
104 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up16x9__avx_acc2()
185 const __m256 vi4x01234567 = _mm256_loadu_ps(i4); in xnn_f32_dwconv_ukernel_up16x9__avx_acc2() local
189 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up16x9__avx_acc2()
249 const __m256 vi4x01234567 = _mm256_maskload_ps(i4, vmask); in xnn_f32_dwconv_ukernel_up16x9__avx_acc2() local
251 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up16x9__avx_acc2()
Dup16x9-avx.c98 const __m256 vi4x01234567 = _mm256_loadu_ps(i4); in xnn_f32_dwconv_ukernel_up16x9__avx() local
104 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up16x9__avx()
182 const __m256 vi4x01234567 = _mm256_loadu_ps(i4); in xnn_f32_dwconv_ukernel_up16x9__avx() local
186 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up16x9__avx()
244 const __m256 vi4x01234567 = _mm256_maskload_ps(i4, vmask); in xnn_f32_dwconv_ukernel_up16x9__avx() local
246 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up16x9__avx()
Dup8x9-fma3-acc2.c85 const __m256 vi4x01234567 = _mm256_loadu_ps(i4); in xnn_f32_dwconv_ukernel_up8x9__fma3_acc2() local
89 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_ukernel_up8x9__fma3_acc2()
149 const __m256 vi4x01234567 = _mm256_maskload_ps(i4, vmask); in xnn_f32_dwconv_ukernel_up8x9__fma3_acc2() local
151 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_ukernel_up8x9__fma3_acc2()
Dup8x9-fma3.c85 const __m256 vi4x01234567 = _mm256_loadu_ps(i4); in xnn_f32_dwconv_ukernel_up8x9__fma3() local
89 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_ukernel_up8x9__fma3()
147 const __m256 vi4x01234567 = _mm256_maskload_ps(i4, vmask); in xnn_f32_dwconv_ukernel_up8x9__fma3() local
149 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_ukernel_up8x9__fma3()
Dup8x9-avx.c85 const __m256 vi4x01234567 = _mm256_loadu_ps(i4); in xnn_f32_dwconv_ukernel_up8x9__avx() local
89 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up8x9__avx()
147 const __m256 vi4x01234567 = _mm256_maskload_ps(i4, vmask); in xnn_f32_dwconv_ukernel_up8x9__avx() local
149 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up8x9__avx()
Dup8x9-avx-acc2.c85 const __m256 vi4x01234567 = _mm256_loadu_ps(i4); in xnn_f32_dwconv_ukernel_up8x9__avx_acc2() local
89 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up8x9__avx_acc2()
149 const __m256 vi4x01234567 = _mm256_maskload_ps(i4, vmask); in xnn_f32_dwconv_ukernel_up8x9__avx_acc2() local
151 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up8x9__avx_acc2()
Dup8x25-fma3-acc2.c117 const __m256 vi4x01234567 = _mm256_loadu_ps(i4); in xnn_f32_dwconv_ukernel_up8x25__fma3_acc2() local
121 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_ukernel_up8x25__fma3_acc2()
277 const __m256 vi4x01234567 = _mm256_maskload_ps(i4, vmask); in xnn_f32_dwconv_ukernel_up8x25__fma3_acc2() local
279 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_ukernel_up8x25__fma3_acc2()
Dup8x25-fma3.c117 const __m256 vi4x01234567 = _mm256_loadu_ps(i4); in xnn_f32_dwconv_ukernel_up8x25__fma3() local
121 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_ukernel_up8x25__fma3()
275 const __m256 vi4x01234567 = _mm256_maskload_ps(i4, vmask); in xnn_f32_dwconv_ukernel_up8x25__fma3() local
277 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_ukernel_up8x25__fma3()
Dup16x25-fma3.c130 const __m256 vi4x01234567 = _mm256_loadu_ps(i4); in xnn_f32_dwconv_ukernel_up16x25__fma3() local
136 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_ukernel_up16x25__fma3()
358 const __m256 vi4x01234567 = _mm256_loadu_ps(i4); in xnn_f32_dwconv_ukernel_up16x25__fma3() local
362 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_ukernel_up16x25__fma3()
516 const __m256 vi4x01234567 = _mm256_maskload_ps(i4, vmask); in xnn_f32_dwconv_ukernel_up16x25__fma3() local
518 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_ukernel_up16x25__fma3()
Dup16x25-fma3-acc2.c130 const __m256 vi4x01234567 = _mm256_loadu_ps(i4); in xnn_f32_dwconv_ukernel_up16x25__fma3_acc2() local
136 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_ukernel_up16x25__fma3_acc2()
361 const __m256 vi4x01234567 = _mm256_loadu_ps(i4); in xnn_f32_dwconv_ukernel_up16x25__fma3_acc2() local
365 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_ukernel_up16x25__fma3_acc2()
521 const __m256 vi4x01234567 = _mm256_maskload_ps(i4, vmask); in xnn_f32_dwconv_ukernel_up16x25__fma3_acc2() local
523 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_ukernel_up16x25__fma3_acc2()
Dup8x25-avx.c117 const __m256 vi4x01234567 = _mm256_loadu_ps(i4); in xnn_f32_dwconv_ukernel_up8x25__avx() local
121 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up8x25__avx()
275 const __m256 vi4x01234567 = _mm256_maskload_ps(i4, vmask); in xnn_f32_dwconv_ukernel_up8x25__avx() local
277 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up8x25__avx()
Dup8x25-avx-acc2.c117 const __m256 vi4x01234567 = _mm256_loadu_ps(i4); in xnn_f32_dwconv_ukernel_up8x25__avx_acc2() local
121 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up8x25__avx_acc2()
277 const __m256 vi4x01234567 = _mm256_maskload_ps(i4, vmask); in xnn_f32_dwconv_ukernel_up8x25__avx_acc2() local
279 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up8x25__avx_acc2()
Dup16x25-avx-acc2.c130 const __m256 vi4x01234567 = _mm256_loadu_ps(i4); in xnn_f32_dwconv_ukernel_up16x25__avx_acc2() local
136 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up16x25__avx_acc2()
361 const __m256 vi4x01234567 = _mm256_loadu_ps(i4); in xnn_f32_dwconv_ukernel_up16x25__avx_acc2() local
365 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up16x25__avx_acc2()
521 const __m256 vi4x01234567 = _mm256_maskload_ps(i4, vmask); in xnn_f32_dwconv_ukernel_up16x25__avx_acc2() local
523 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up16x25__avx_acc2()
Dup16x25-avx.c130 const __m256 vi4x01234567 = _mm256_loadu_ps(i4); in xnn_f32_dwconv_ukernel_up16x25__avx() local
136 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up16x25__avx()
358 const __m256 vi4x01234567 = _mm256_loadu_ps(i4); in xnn_f32_dwconv_ukernel_up16x25__avx() local
362 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up16x25__avx()
516 const __m256 vi4x01234567 = _mm256_maskload_ps(i4, vmask); in xnn_f32_dwconv_ukernel_up16x25__avx() local
518 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up16x25__avx()