/external/XNNPACK/src/f32-dwconv/gen/ |
D | up4x25-psimd-acc2.c | 101 psimd_f32 vacc0123p1 = psimd_mul_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up4x25__psimd_acc2() local 113 vacc0123p1 = psimd_qfma_f32(vacc0123p1, vi3x0123, vk3x0123); in xnn_f32_dwconv_ukernel_up4x25__psimd_acc2() 125 vacc0123p1 = psimd_qfma_f32(vacc0123p1, vi5x0123, vk5x0123); in xnn_f32_dwconv_ukernel_up4x25__psimd_acc2() 137 vacc0123p1 = psimd_qfma_f32(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_ukernel_up4x25__psimd_acc2() 149 vacc0123p1 = psimd_qfma_f32(vacc0123p1, vi9x0123, vk9x0123); in xnn_f32_dwconv_ukernel_up4x25__psimd_acc2() 161 vacc0123p1 = psimd_qfma_f32(vacc0123p1, vi11x0123, vk11x0123); in xnn_f32_dwconv_ukernel_up4x25__psimd_acc2() 173 vacc0123p1 = psimd_qfma_f32(vacc0123p1, vi13x0123, vk13x0123); in xnn_f32_dwconv_ukernel_up4x25__psimd_acc2() 185 vacc0123p1 = psimd_qfma_f32(vacc0123p1, vi15x0123, vk15x0123); in xnn_f32_dwconv_ukernel_up4x25__psimd_acc2() 197 vacc0123p1 = psimd_qfma_f32(vacc0123p1, vi17x0123, vk17x0123); in xnn_f32_dwconv_ukernel_up4x25__psimd_acc2() 209 vacc0123p1 = psimd_qfma_f32(vacc0123p1, vi19x0123, vk19x0123); in xnn_f32_dwconv_ukernel_up4x25__psimd_acc2() [all …]
|
D | up4x25-sse-acc2.c | 101 __m128 vacc0123p1 = _mm_mul_ps(vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2() local 113 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi3x0123, vk3x0123)); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2() 125 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi5x0123, vk5x0123)); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2() 137 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2() 149 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi9x0123, vk9x0123)); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2() 161 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi11x0123, vk11x0123)); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2() 173 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi13x0123, vk13x0123)); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2() 185 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi15x0123, vk15x0123)); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2() 197 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi17x0123, vk17x0123)); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2() 209 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi19x0123, vk19x0123)); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2() [all …]
|
D | up8x25-psimd-acc2.c | 107 psimd_f32 vacc0123p1 = psimd_mul_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up8x25__psimd_acc2() local 125 vacc0123p1 = psimd_qfma_f32(vacc0123p1, vi3x0123, vk3x0123); in xnn_f32_dwconv_ukernel_up8x25__psimd_acc2() 143 vacc0123p1 = psimd_qfma_f32(vacc0123p1, vi5x0123, vk5x0123); in xnn_f32_dwconv_ukernel_up8x25__psimd_acc2() 161 vacc0123p1 = psimd_qfma_f32(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_ukernel_up8x25__psimd_acc2() 179 vacc0123p1 = psimd_qfma_f32(vacc0123p1, vi9x0123, vk9x0123); in xnn_f32_dwconv_ukernel_up8x25__psimd_acc2() 197 vacc0123p1 = psimd_qfma_f32(vacc0123p1, vi11x0123, vk11x0123); in xnn_f32_dwconv_ukernel_up8x25__psimd_acc2() 215 vacc0123p1 = psimd_qfma_f32(vacc0123p1, vi13x0123, vk13x0123); in xnn_f32_dwconv_ukernel_up8x25__psimd_acc2() 233 vacc0123p1 = psimd_qfma_f32(vacc0123p1, vi15x0123, vk15x0123); in xnn_f32_dwconv_ukernel_up8x25__psimd_acc2() 251 vacc0123p1 = psimd_qfma_f32(vacc0123p1, vi17x0123, vk17x0123); in xnn_f32_dwconv_ukernel_up8x25__psimd_acc2() 269 vacc0123p1 = psimd_qfma_f32(vacc0123p1, vi19x0123, vk19x0123); in xnn_f32_dwconv_ukernel_up8x25__psimd_acc2() [all …]
|
D | up8x25-sse-acc2.c | 107 __m128 vacc0123p1 = _mm_mul_ps(vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2() local 125 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi3x0123, vk3x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2() 143 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi5x0123, vk5x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2() 161 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2() 179 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi9x0123, vk9x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2() 197 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi11x0123, vk11x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2() 215 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi13x0123, vk13x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2() 233 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi15x0123, vk15x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2() 251 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi17x0123, vk17x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2() 269 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi19x0123, vk19x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2() [all …]
|
D | up8x9-neon-acc2.c | 71 float32x4_t vacc0123p1 = vmulq_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up8x9__neon_acc2() local 85 vacc0123p1 = vmlaq_f32(vacc0123p1, vi3x0123, vk3x0123); in xnn_f32_dwconv_ukernel_up8x9__neon_acc2() 99 vacc0123p1 = vmlaq_f32(vacc0123p1, vi5x0123, vk5x0123); in xnn_f32_dwconv_ukernel_up8x9__neon_acc2() 113 vacc0123p1 = vmlaq_f32(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_ukernel_up8x9__neon_acc2() 124 vacc0123p0 = vaddq_f32(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_ukernel_up8x9__neon_acc2() 145 float32x4_t vacc0123p1 = vmulq_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up8x9__neon_acc2() local 153 vacc0123p1 = vmlaq_f32(vacc0123p1, vi3x0123, vk3x0123); in xnn_f32_dwconv_ukernel_up8x9__neon_acc2() 161 vacc0123p1 = vmlaq_f32(vacc0123p1, vi5x0123, vk5x0123); in xnn_f32_dwconv_ukernel_up8x9__neon_acc2() 169 vacc0123p1 = vmlaq_f32(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_ukernel_up8x9__neon_acc2() 176 vacc0123p0 = vaddq_f32(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_ukernel_up8x9__neon_acc2() [all …]
|
D | up8x9-neonfma-acc2.c | 71 float32x4_t vacc0123p1 = vmulq_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2() local 85 vacc0123p1 = vfmaq_f32(vacc0123p1, vi3x0123, vk3x0123); in xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2() 99 vacc0123p1 = vfmaq_f32(vacc0123p1, vi5x0123, vk5x0123); in xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2() 113 vacc0123p1 = vfmaq_f32(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2() 124 vacc0123p0 = vaddq_f32(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2() 145 float32x4_t vacc0123p1 = vmulq_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2() local 153 vacc0123p1 = vfmaq_f32(vacc0123p1, vi3x0123, vk3x0123); in xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2() 161 vacc0123p1 = vfmaq_f32(vacc0123p1, vi5x0123, vk5x0123); in xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2() 169 vacc0123p1 = vfmaq_f32(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2() 176 vacc0123p0 = vaddq_f32(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2() [all …]
|
D | up8x9-sse-acc2.c | 75 __m128 vacc0123p1 = _mm_mul_ps(vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2() local 93 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi3x0123, vk3x0123)); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2() 111 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi5x0123, vk5x0123)); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2() 129 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2() 144 vacc0123p0 = _mm_add_ps(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2() 169 __m128 vacc0123p1 = _mm_mul_ps(vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2() local 181 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi3x0123, vk3x0123)); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2() 193 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi5x0123, vk5x0123)); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2() 205 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2() 216 vacc0123p0 = _mm_add_ps(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2() [all …]
|
D | up8x9-psimd-acc2.c | 75 psimd_f32 vacc0123p1 = psimd_mul_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up8x9__psimd_acc2() local 93 vacc0123p1 = psimd_qfma_f32(vacc0123p1, vi3x0123, vk3x0123); in xnn_f32_dwconv_ukernel_up8x9__psimd_acc2() 111 vacc0123p1 = psimd_qfma_f32(vacc0123p1, vi5x0123, vk5x0123); in xnn_f32_dwconv_ukernel_up8x9__psimd_acc2() 129 vacc0123p1 = psimd_qfma_f32(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_ukernel_up8x9__psimd_acc2() 144 vacc0123p0 = psimd_add_f32(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_ukernel_up8x9__psimd_acc2() 169 psimd_f32 vacc0123p1 = psimd_mul_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up8x9__psimd_acc2() local 181 vacc0123p1 = psimd_qfma_f32(vacc0123p1, vi3x0123, vk3x0123); in xnn_f32_dwconv_ukernel_up8x9__psimd_acc2() 193 vacc0123p1 = psimd_qfma_f32(vacc0123p1, vi5x0123, vk5x0123); in xnn_f32_dwconv_ukernel_up8x9__psimd_acc2() 205 vacc0123p1 = psimd_qfma_f32(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_ukernel_up8x9__psimd_acc2() 216 vacc0123p0 = psimd_add_f32(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_ukernel_up8x9__psimd_acc2() [all …]
|
D | up4x9-psimd-acc2.c | 69 psimd_f32 vacc0123p1 = psimd_mul_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up4x9__psimd_acc2() local 81 vacc0123p1 = psimd_qfma_f32(vacc0123p1, vi3x0123, vk3x0123); in xnn_f32_dwconv_ukernel_up4x9__psimd_acc2() 93 vacc0123p1 = psimd_qfma_f32(vacc0123p1, vi5x0123, vk5x0123); in xnn_f32_dwconv_ukernel_up4x9__psimd_acc2() 105 vacc0123p1 = psimd_qfma_f32(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_ukernel_up4x9__psimd_acc2() 116 vacc0123p0 = psimd_add_f32(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_ukernel_up4x9__psimd_acc2() 133 psimd_f32 vacc0123p1 = psimd_mul_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up4x9__psimd_acc2() local 141 vacc0123p1 = psimd_qfma_f32(vacc0123p1, vi3x0123, vk3x0123); in xnn_f32_dwconv_ukernel_up4x9__psimd_acc2() 149 vacc0123p1 = psimd_qfma_f32(vacc0123p1, vi5x0123, vk5x0123); in xnn_f32_dwconv_ukernel_up4x9__psimd_acc2() 157 vacc0123p1 = psimd_qfma_f32(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_ukernel_up4x9__psimd_acc2() 164 vacc0123p0 = psimd_add_f32(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_ukernel_up4x9__psimd_acc2()
|
D | up4x9-sse-acc2.c | 69 __m128 vacc0123p1 = _mm_mul_ps(vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2() local 81 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi3x0123, vk3x0123)); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2() 93 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi5x0123, vk5x0123)); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2() 105 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2() 116 vacc0123p0 = _mm_add_ps(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2() 133 __m128 vacc0123p1 = _mm_mul_ps(vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2() local 141 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi3x0123, vk3x0123)); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2() 149 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi5x0123, vk5x0123)); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2() 157 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2() 164 vacc0123p0 = _mm_add_ps(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2()
|
D | up4x9-neonfma-acc2.c | 65 float32x4_t vacc0123p1 = vmulq_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up4x9__neonfma_acc2() local 73 vacc0123p1 = vfmaq_f32(vacc0123p1, vi3x0123, vk3x0123); in xnn_f32_dwconv_ukernel_up4x9__neonfma_acc2() 81 vacc0123p1 = vfmaq_f32(vacc0123p1, vi5x0123, vk5x0123); in xnn_f32_dwconv_ukernel_up4x9__neonfma_acc2() 89 vacc0123p1 = vfmaq_f32(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_ukernel_up4x9__neonfma_acc2() 96 vacc0123p0 = vaddq_f32(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_ukernel_up4x9__neonfma_acc2() 113 float32x4_t vacc0123p1 = vmulq_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up4x9__neonfma_acc2() local 121 vacc0123p1 = vfmaq_f32(vacc0123p1, vi3x0123, vk3x0123); in xnn_f32_dwconv_ukernel_up4x9__neonfma_acc2() 129 vacc0123p1 = vfmaq_f32(vacc0123p1, vi5x0123, vk5x0123); in xnn_f32_dwconv_ukernel_up4x9__neonfma_acc2() 137 vacc0123p1 = vfmaq_f32(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_ukernel_up4x9__neonfma_acc2() 144 vacc0123p0 = vaddq_f32(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_ukernel_up4x9__neonfma_acc2()
|
D | up4x9-neon-acc2.c | 65 float32x4_t vacc0123p1 = vmulq_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up4x9__neon_acc2() local 73 vacc0123p1 = vmlaq_f32(vacc0123p1, vi3x0123, vk3x0123); in xnn_f32_dwconv_ukernel_up4x9__neon_acc2() 81 vacc0123p1 = vmlaq_f32(vacc0123p1, vi5x0123, vk5x0123); in xnn_f32_dwconv_ukernel_up4x9__neon_acc2() 89 vacc0123p1 = vmlaq_f32(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_ukernel_up4x9__neon_acc2() 96 vacc0123p0 = vaddq_f32(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_ukernel_up4x9__neon_acc2() 113 float32x4_t vacc0123p1 = vmulq_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up4x9__neon_acc2() local 121 vacc0123p1 = vmlaq_f32(vacc0123p1, vi3x0123, vk3x0123); in xnn_f32_dwconv_ukernel_up4x9__neon_acc2() 129 vacc0123p1 = vmlaq_f32(vacc0123p1, vi5x0123, vk5x0123); in xnn_f32_dwconv_ukernel_up4x9__neon_acc2() 137 vacc0123p1 = vmlaq_f32(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_ukernel_up4x9__neon_acc2() 144 vacc0123p0 = vaddq_f32(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_ukernel_up4x9__neon_acc2()
|
D | up8x4-psimd-acc2.c | 65 psimd_f32 vacc0123p1 = psimd_mul_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up8x4__psimd_acc2() local 83 vacc0123p1 = psimd_qfma_f32(vacc0123p1, vi3x0123, vk3x0123); in xnn_f32_dwconv_ukernel_up8x4__psimd_acc2() 89 vacc0123p0 = psimd_add_f32(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_ukernel_up8x4__psimd_acc2() 114 psimd_f32 vacc0123p1 = psimd_mul_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up8x4__psimd_acc2() local 126 vacc0123p1 = psimd_qfma_f32(vacc0123p1, vi3x0123, vk3x0123); in xnn_f32_dwconv_ukernel_up8x4__psimd_acc2() 131 vacc0123p0 = psimd_add_f32(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_ukernel_up8x4__psimd_acc2() 148 psimd_f32 vacc0123p1 = psimd_mul_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up8x4__psimd_acc2() local 156 vacc0123p1 = psimd_qfma_f32(vacc0123p1, vi3x0123, vk3x0123); in xnn_f32_dwconv_ukernel_up8x4__psimd_acc2() 159 vacc0123p0 = psimd_add_f32(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_ukernel_up8x4__psimd_acc2()
|
D | up8x4-sse-acc2.c | 65 __m128 vacc0123p1 = _mm_mul_ps(vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up8x4__sse_acc2() local 83 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi3x0123, vk3x0123)); in xnn_f32_dwconv_ukernel_up8x4__sse_acc2() 89 vacc0123p0 = _mm_add_ps(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_ukernel_up8x4__sse_acc2() 114 __m128 vacc0123p1 = _mm_mul_ps(vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up8x4__sse_acc2() local 126 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi3x0123, vk3x0123)); in xnn_f32_dwconv_ukernel_up8x4__sse_acc2() 131 vacc0123p0 = _mm_add_ps(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_ukernel_up8x4__sse_acc2() 148 __m128 vacc0123p1 = _mm_mul_ps(vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up8x4__sse_acc2() local 156 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi3x0123, vk3x0123)); in xnn_f32_dwconv_ukernel_up8x4__sse_acc2() 159 vacc0123p0 = _mm_add_ps(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_ukernel_up8x4__sse_acc2()
|
D | up4x4-psimd-acc2.c | 59 psimd_f32 vacc0123p1 = psimd_mul_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up4x4__psimd_acc2() local 71 vacc0123p1 = psimd_qfma_f32(vacc0123p1, vi3x0123, vk3x0123); in xnn_f32_dwconv_ukernel_up4x4__psimd_acc2() 76 vacc0123p0 = psimd_add_f32(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_ukernel_up4x4__psimd_acc2() 93 psimd_f32 vacc0123p1 = psimd_mul_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up4x4__psimd_acc2() local 101 vacc0123p1 = psimd_qfma_f32(vacc0123p1, vi3x0123, vk3x0123); in xnn_f32_dwconv_ukernel_up4x4__psimd_acc2() 104 vacc0123p0 = psimd_add_f32(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_ukernel_up4x4__psimd_acc2()
|
D | up4x4-sse-acc2.c | 59 __m128 vacc0123p1 = _mm_mul_ps(vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up4x4__sse_acc2() local 71 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi3x0123, vk3x0123)); in xnn_f32_dwconv_ukernel_up4x4__sse_acc2() 76 vacc0123p0 = _mm_add_ps(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_ukernel_up4x4__sse_acc2() 93 __m128 vacc0123p1 = _mm_mul_ps(vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up4x4__sse_acc2() local 101 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi3x0123, vk3x0123)); in xnn_f32_dwconv_ukernel_up4x4__sse_acc2() 104 vacc0123p0 = _mm_add_ps(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_ukernel_up4x4__sse_acc2()
|