Home
last modified time | relevance | path

Searched refs:vk4x01234567 (Results 1 – 16 of 16) sorted by relevance

/external/XNNPACK/src/f32-dwconv/gen/
Dup16x9-fma3-acc2.c102 const __m256 vk4x01234567 = _mm256_load_ps(w + 80); in xnn_f32_dwconv_ukernel_up16x9__fma3_acc2() local
104 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_ukernel_up16x9__fma3_acc2()
188 const __m256 vk4x01234567 = _mm256_load_ps(w + 80); in xnn_f32_dwconv_ukernel_up16x9__fma3_acc2() local
189 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_ukernel_up16x9__fma3_acc2()
250 const __m256 vk4x01234567 = _mm256_load_ps(w + 80); in xnn_f32_dwconv_ukernel_up16x9__fma3_acc2() local
251 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_ukernel_up16x9__fma3_acc2()
Dup16x9-fma3.c102 const __m256 vk4x01234567 = _mm256_load_ps(w + 80); in xnn_f32_dwconv_ukernel_up16x9__fma3() local
104 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_ukernel_up16x9__fma3()
185 const __m256 vk4x01234567 = _mm256_load_ps(w + 80); in xnn_f32_dwconv_ukernel_up16x9__fma3() local
186 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_ukernel_up16x9__fma3()
245 const __m256 vk4x01234567 = _mm256_load_ps(w + 80); in xnn_f32_dwconv_ukernel_up16x9__fma3() local
246 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_ukernel_up16x9__fma3()
Dup16x9-avx-acc2.c102 const __m256 vk4x01234567 = _mm256_load_ps(w + 80); in xnn_f32_dwconv_ukernel_up16x9__avx_acc2() local
104 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up16x9__avx_acc2()
188 const __m256 vk4x01234567 = _mm256_load_ps(w + 80); in xnn_f32_dwconv_ukernel_up16x9__avx_acc2() local
189 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up16x9__avx_acc2()
250 const __m256 vk4x01234567 = _mm256_load_ps(w + 80); in xnn_f32_dwconv_ukernel_up16x9__avx_acc2() local
251 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up16x9__avx_acc2()
Dup16x9-avx.c102 const __m256 vk4x01234567 = _mm256_load_ps(w + 80); in xnn_f32_dwconv_ukernel_up16x9__avx() local
104 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up16x9__avx()
185 const __m256 vk4x01234567 = _mm256_load_ps(w + 80); in xnn_f32_dwconv_ukernel_up16x9__avx() local
186 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up16x9__avx()
245 const __m256 vk4x01234567 = _mm256_load_ps(w + 80); in xnn_f32_dwconv_ukernel_up16x9__avx() local
246 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up16x9__avx()
Dup8x9-fma3-acc2.c88 const __m256 vk4x01234567 = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up8x9__fma3_acc2() local
89 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_ukernel_up8x9__fma3_acc2()
150 const __m256 vk4x01234567 = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up8x9__fma3_acc2() local
151 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_ukernel_up8x9__fma3_acc2()
Dup8x9-fma3.c88 const __m256 vk4x01234567 = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up8x9__fma3() local
89 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_ukernel_up8x9__fma3()
148 const __m256 vk4x01234567 = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up8x9__fma3() local
149 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_ukernel_up8x9__fma3()
Dup8x9-avx.c88 const __m256 vk4x01234567 = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up8x9__avx() local
89 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up8x9__avx()
148 const __m256 vk4x01234567 = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up8x9__avx() local
149 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up8x9__avx()
Dup8x9-avx-acc2.c88 const __m256 vk4x01234567 = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up8x9__avx_acc2() local
89 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up8x9__avx_acc2()
150 const __m256 vk4x01234567 = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up8x9__avx_acc2() local
151 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up8x9__avx_acc2()
Dup8x25-fma3-acc2.c120 const __m256 vk4x01234567 = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up8x25__fma3_acc2() local
121 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_ukernel_up8x25__fma3_acc2()
278 const __m256 vk4x01234567 = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up8x25__fma3_acc2() local
279 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_ukernel_up8x25__fma3_acc2()
Dup8x25-fma3.c120 const __m256 vk4x01234567 = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up8x25__fma3() local
121 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_ukernel_up8x25__fma3()
276 const __m256 vk4x01234567 = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up8x25__fma3() local
277 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_ukernel_up8x25__fma3()
Dup16x25-fma3.c134 const __m256 vk4x01234567 = _mm256_load_ps(w + 80); in xnn_f32_dwconv_ukernel_up16x25__fma3() local
136 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_ukernel_up16x25__fma3()
361 const __m256 vk4x01234567 = _mm256_load_ps(w + 80); in xnn_f32_dwconv_ukernel_up16x25__fma3() local
362 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_ukernel_up16x25__fma3()
517 const __m256 vk4x01234567 = _mm256_load_ps(w + 80); in xnn_f32_dwconv_ukernel_up16x25__fma3() local
518 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_ukernel_up16x25__fma3()
Dup16x25-fma3-acc2.c134 const __m256 vk4x01234567 = _mm256_load_ps(w + 80); in xnn_f32_dwconv_ukernel_up16x25__fma3_acc2() local
136 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_ukernel_up16x25__fma3_acc2()
364 const __m256 vk4x01234567 = _mm256_load_ps(w + 80); in xnn_f32_dwconv_ukernel_up16x25__fma3_acc2() local
365 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_ukernel_up16x25__fma3_acc2()
522 const __m256 vk4x01234567 = _mm256_load_ps(w + 80); in xnn_f32_dwconv_ukernel_up16x25__fma3_acc2() local
523 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_ukernel_up16x25__fma3_acc2()
Dup8x25-avx.c120 const __m256 vk4x01234567 = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up8x25__avx() local
121 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up8x25__avx()
276 const __m256 vk4x01234567 = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up8x25__avx() local
277 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up8x25__avx()
Dup8x25-avx-acc2.c120 const __m256 vk4x01234567 = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up8x25__avx_acc2() local
121 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up8x25__avx_acc2()
278 const __m256 vk4x01234567 = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up8x25__avx_acc2() local
279 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up8x25__avx_acc2()
Dup16x25-avx-acc2.c134 const __m256 vk4x01234567 = _mm256_load_ps(w + 80); in xnn_f32_dwconv_ukernel_up16x25__avx_acc2() local
136 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up16x25__avx_acc2()
364 const __m256 vk4x01234567 = _mm256_load_ps(w + 80); in xnn_f32_dwconv_ukernel_up16x25__avx_acc2() local
365 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up16x25__avx_acc2()
522 const __m256 vk4x01234567 = _mm256_load_ps(w + 80); in xnn_f32_dwconv_ukernel_up16x25__avx_acc2() local
523 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up16x25__avx_acc2()
Dup16x25-avx.c134 const __m256 vk4x01234567 = _mm256_load_ps(w + 80); in xnn_f32_dwconv_ukernel_up16x25__avx() local
136 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up16x25__avx()
361 const __m256 vk4x01234567 = _mm256_load_ps(w + 80); in xnn_f32_dwconv_ukernel_up16x25__avx() local
362 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up16x25__avx()
517 const __m256 vk4x01234567 = _mm256_load_ps(w + 80); in xnn_f32_dwconv_ukernel_up16x25__avx() local
518 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up16x25__avx()