Home
last modified time | relevance | path

Searched refs:vk4x01234567 (Results 1 – 25 of 285) sorted by relevance

12345678910>>...12

/external/XNNPACK/src/f32-dwconv/gen/
Dup16x9-minmax-fma3-acc2.c129 const __m256 vk4x01234567 = _mm256_load_ps(w + 80); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2() local
131 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2()
215 const __m256 vk4x01234567 = _mm256_load_ps(w + 80); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2() local
216 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2()
277 const __m256 vk4x01234567 = _mm256_load_ps(w + 80); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2() local
278 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2()
Dup16x9-minmax-fma3.c129 const __m256 vk4x01234567 = _mm256_load_ps(w + 80); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3() local
131 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3()
212 const __m256 vk4x01234567 = _mm256_load_ps(w + 80); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3() local
213 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3()
272 const __m256 vk4x01234567 = _mm256_load_ps(w + 80); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3() local
273 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3()
Dup16x9-minmax-avx.c129 const __m256 vk4x01234567 = _mm256_load_ps(w + 80); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx() local
131 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx()
212 const __m256 vk4x01234567 = _mm256_load_ps(w + 80); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx() local
213 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx()
272 const __m256 vk4x01234567 = _mm256_load_ps(w + 80); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx() local
273 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx()
Dup16x9-minmax-avx-acc2.c129 const __m256 vk4x01234567 = _mm256_load_ps(w + 80); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx_acc2() local
131 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx_acc2()
215 const __m256 vk4x01234567 = _mm256_load_ps(w + 80); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx_acc2() local
216 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx_acc2()
277 const __m256 vk4x01234567 = _mm256_load_ps(w + 80); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx_acc2() local
278 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx_acc2()
Dup8x9-minmax-fma3.c115 const __m256 vk4x01234567 = _mm256_load_ps(w + 40); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3() local
116 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3()
175 const __m256 vk4x01234567 = _mm256_load_ps(w + 40); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3() local
176 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3()
Dup8x9-minmax-avx.c115 const __m256 vk4x01234567 = _mm256_load_ps(w + 40); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx() local
116 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx()
175 const __m256 vk4x01234567 = _mm256_load_ps(w + 40); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx() local
176 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx()
Dup8x9-minmax-fma3-acc2.c115 const __m256 vk4x01234567 = _mm256_load_ps(w + 40); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2() local
116 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2()
177 const __m256 vk4x01234567 = _mm256_load_ps(w + 40); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2() local
178 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2()
Dup8x9-minmax-avx-acc2.c115 const __m256 vk4x01234567 = _mm256_load_ps(w + 40); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx_acc2() local
116 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx_acc2()
177 const __m256 vk4x01234567 = _mm256_load_ps(w + 40); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx_acc2() local
178 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx_acc2()
/external/XNNPACK/src/f16-dwconv/gen/
Dup16x9-minmax-neonfp16arith-acc2.c121 const float16x8_t vk4x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2() local
123 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi4x01234567, vk4x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2()
187 const float16x8_t vk4x01234567 = vld1q_f16(w + 72); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2() local
188 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi4x01234567, vk4x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2()
235 const float16x8_t vk4x01234567 = vld1q_f16(w + 80); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2() local
236 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi4x01234567, vk4x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2()
Dup16x9-minmax-neonfp16arith.c121 const float16x8_t vk4x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith() local
123 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi4x01234567, vk4x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith()
184 const float16x8_t vk4x01234567 = vld1q_f16(w + 72); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith() local
185 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi4x01234567, vk4x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith()
230 const float16x8_t vk4x01234567 = vld1q_f16(w + 80); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith() local
231 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi4x01234567, vk4x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith()
Dup8x9-minmax-neonfp16arith.c107 const float16x8_t vk4x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith() local
108 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi4x01234567, vk4x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith()
153 const float16x8_t vk4x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith() local
154 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi4x01234567, vk4x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith()
Dup8x9-minmax-neonfp16arith-acc2.c107 const float16x8_t vk4x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2() local
108 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi4x01234567, vk4x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2()
155 const float16x8_t vk4x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2() local
156 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi4x01234567, vk4x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2()
Dup16x9-minmax-fma3.c132 const __m256 vk4x01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) (w + 80))); in xnn_f16_dwconv_minmax_ukernel_up16x9__fma3() local
134 …vacc01234567p0 = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc0… in xnn_f16_dwconv_minmax_ukernel_up16x9__fma3()
215 const __m256 vk4x01234567 = _mm256_cvtph_ps(_mm_load_si128((const __m128i*) (w + 80))); in xnn_f16_dwconv_minmax_ukernel_up16x9__fma3() local
216 …vacc01234567p0 = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc0… in xnn_f16_dwconv_minmax_ukernel_up16x9__fma3()
279 const __m256 vk4x01234567 = _mm256_cvtph_ps(_mm_load_si128((const __m128i*) (w + 80))); in xnn_f16_dwconv_minmax_ukernel_up16x9__fma3() local
280 …vacc01234567p0 = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc0… in xnn_f16_dwconv_minmax_ukernel_up16x9__fma3()
Dup8x9-minmax-fma3-acc2.c118 const __m256 vk4x01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) (w + 40))); in xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2() local
119 …vacc01234567p0 = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc0… in xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2()
184 const __m256 vk4x01234567 = _mm256_cvtph_ps(_mm_load_si128((const __m128i*) (w + 40))); in xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2() local
185 …vacc01234567p0 = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc0… in xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2()
Dup8x9-minmax-fma3.c118 const __m256 vk4x01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) (w + 40))); in xnn_f16_dwconv_minmax_ukernel_up8x9__fma3() local
119 …vacc01234567p0 = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc0… in xnn_f16_dwconv_minmax_ukernel_up8x9__fma3()
182 const __m256 vk4x01234567 = _mm256_cvtph_ps(_mm_load_si128((const __m128i*) (w + 40))); in xnn_f16_dwconv_minmax_ukernel_up8x9__fma3() local
183 …vacc01234567p0 = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc0… in xnn_f16_dwconv_minmax_ukernel_up8x9__fma3()
Dup32x9-minmax-neonfp16arith-acc2.c149 const float16x8_t vk4x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2() local
153 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi4x01234567, vk4x01234567); in xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2()
251 const float16x8_t vk4x01234567 = vld1q_f16(w + 152); in xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2() local
252 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi4x01234567, vk4x01234567); in xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2()
299 const float16x8_t vk4x01234567 = vld1q_f16(w + 160); in xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2() local
300 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi4x01234567, vk4x01234567); in xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2()
/external/XNNPACK/src/qs8-dwconv/gen/
Dup8x9-minmax-fp32-neonv8-mul16.c117 … const int16x8_t vk4x01234567 = vmovl_s8(vld1_s8(w)); w = (const void*) ((const int8_t*) w + 8); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16() local
119 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi4x01234567), vget_low_s16(vk4x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16()
120 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi4x01234567), vget_high_s16(vk4x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16()
201 const int16x8_t vk4x01234567 = vmovl_s8(vld1_s8((const void*) ((const int8_t*) w + 32))); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16() local
203 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi4x01234567), vget_low_s16(vk4x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16()
204 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi4x01234567), vget_high_s16(vk4x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16()
Dup8x9-minmax-rndnu-neon-mul16.c118 … const int16x8_t vk4x01234567 = vmovl_s8(vld1_s8(w)); w = (const void*) ((const int8_t*) w + 8); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16() local
120 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi4x01234567), vget_low_s16(vk4x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16()
121 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi4x01234567), vget_high_s16(vk4x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16()
202 const int16x8_t vk4x01234567 = vmovl_s8(vld1_s8((const void*) ((const int8_t*) w + 32))); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16() local
204 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi4x01234567), vget_low_s16(vk4x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16()
205 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi4x01234567), vget_high_s16(vk4x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16()
Dup8x9-minmax-fp32-neon-mul16.c117 … const int16x8_t vk4x01234567 = vmovl_s8(vld1_s8(w)); w = (const void*) ((const int8_t*) w + 8); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16() local
119 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi4x01234567), vget_low_s16(vk4x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16()
120 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi4x01234567), vget_high_s16(vk4x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16()
202 const int16x8_t vk4x01234567 = vmovl_s8(vld1_s8((const void*) ((const int8_t*) w + 32))); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16() local
204 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi4x01234567), vget_low_s16(vk4x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16()
205 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi4x01234567), vget_high_s16(vk4x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16()
/external/XNNPACK/src/qc8-dwconv/gen/
Dup8x9-minmax-fp32-neonv8-mul16.c116 … const int16x8_t vk4x01234567 = vmovl_s8(vld1_s8(w)); w = (const void*) ((const int8_t*) w + 8); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16() local
118 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi4x01234567), vget_low_s16(vk4x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16()
119 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi4x01234567), vget_high_s16(vk4x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16()
203 const int16x8_t vk4x01234567 = vmovl_s8(vld1_s8((const void*) ((const int8_t*) w + 32))); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16() local
205 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi4x01234567), vget_low_s16(vk4x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16()
206 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi4x01234567), vget_high_s16(vk4x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16()
Dup8x9-minmax-fp32-neon-mul16.c116 … const int16x8_t vk4x01234567 = vmovl_s8(vld1_s8(w)); w = (const void*) ((const int8_t*) w + 8); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16() local
118 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi4x01234567), vget_low_s16(vk4x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16()
119 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi4x01234567), vget_high_s16(vk4x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16()
204 const int16x8_t vk4x01234567 = vmovl_s8(vld1_s8((const void*) ((const int8_t*) w + 32))); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16() local
206 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi4x01234567), vget_low_s16(vk4x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16()
207 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi4x01234567), vget_high_s16(vk4x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16()
Dup8x9-minmax-fp32-sse2-mul16-add16.c135 …const __m128i vk4x01234567 = _mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t)… in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16() local
139 … const __m128i vxk4x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vk4x01234567, vk4x01234567), 8); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16()
272 …const __m128i vk4x01234567 = _mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t)… in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16() local
275 … const __m128i vxk4x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vk4x01234567, vk4x01234567), 8); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16()
/external/XNNPACK/src/qu8-dwconv/gen/
Dup8x9-minmax-fp32-neonv8-mul16.c118 …const int16x8_t vk4x01234567 = vreinterpretq_s16_u16(vsubl_u8(vld1_u8(w), vkernel_zero_point)); w … in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16() local
120 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi4x01234567), vget_low_s16(vk4x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16()
121 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi4x01234567), vget_high_s16(vk4x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16()
202 …const int16x8_t vk4x01234567 = vreinterpretq_s16_u16(vsubl_u8(vld1_u8((const void*) ((const uint8_… in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16() local
204 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi4x01234567), vget_low_s16(vk4x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16()
205 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi4x01234567), vget_high_s16(vk4x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16()
Dup8x9-minmax-rndnu-neon-mul16.c119 …const int16x8_t vk4x01234567 = vreinterpretq_s16_u16(vsubl_u8(vld1_u8(w), vkernel_zero_point)); w … in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16() local
121 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi4x01234567), vget_low_s16(vk4x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16()
122 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi4x01234567), vget_high_s16(vk4x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16()
203 …const int16x8_t vk4x01234567 = vreinterpretq_s16_u16(vsubl_u8(vld1_u8((const void*) ((const uint8_… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16() local
205 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi4x01234567), vget_low_s16(vk4x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16()
206 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi4x01234567), vget_high_s16(vk4x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16()
Dup8x9-minmax-fp32-neon-mul16.c118 …const int16x8_t vk4x01234567 = vreinterpretq_s16_u16(vsubl_u8(vld1_u8(w), vkernel_zero_point)); w … in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16() local
120 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi4x01234567), vget_low_s16(vk4x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16()
121 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi4x01234567), vget_high_s16(vk4x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16()
203 …const int16x8_t vk4x01234567 = vreinterpretq_s16_u16(vsubl_u8(vld1_u8((const void*) ((const uint8_… in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16() local
205 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi4x01234567), vget_low_s16(vk4x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16()
206 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi4x01234567), vget_high_s16(vk4x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16()

12345678910>>...12