Home
last modified time | relevance | path

Searched refs:vk8x01234567 (Results 1 – 25 of 285) sorted by relevance

12345678910>>...12

/external/XNNPACK/src/f32-dwconv/gen/
Dup16x9-minmax-fma3-acc2.c165 const __m256 vk8x01234567 = _mm256_load_ps(w + 144); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2() local
167 vacc01234567p0 = _mm256_fmadd_ps(vi8x01234567, vk8x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2()
239 const __m256 vk8x01234567 = _mm256_load_ps(w + 144); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2() local
240 vacc01234567p0 = _mm256_fmadd_ps(vi8x01234567, vk8x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2()
293 const __m256 vk8x01234567 = _mm256_load_ps(w + 144); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2() local
294 vacc01234567p0 = _mm256_fmadd_ps(vi8x01234567, vk8x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2()
Dup16x9-minmax-fma3.c165 const __m256 vk8x01234567 = _mm256_load_ps(w + 144); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3() local
167 vacc01234567p0 = _mm256_fmadd_ps(vi8x01234567, vk8x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3()
236 const __m256 vk8x01234567 = _mm256_load_ps(w + 144); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3() local
237 vacc01234567p0 = _mm256_fmadd_ps(vi8x01234567, vk8x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3()
288 const __m256 vk8x01234567 = _mm256_load_ps(w + 144); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3() local
289 vacc01234567p0 = _mm256_fmadd_ps(vi8x01234567, vk8x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3()
Dup16x9-minmax-avx.c165 const __m256 vk8x01234567 = _mm256_load_ps(w + 144); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx() local
167 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi8x01234567, vk8x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx()
236 const __m256 vk8x01234567 = _mm256_load_ps(w + 144); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx() local
237 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi8x01234567, vk8x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx()
288 const __m256 vk8x01234567 = _mm256_load_ps(w + 144); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx() local
289 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi8x01234567, vk8x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx()
Dup16x9-minmax-avx-acc2.c165 const __m256 vk8x01234567 = _mm256_load_ps(w + 144); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx_acc2() local
167 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi8x01234567, vk8x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx_acc2()
239 const __m256 vk8x01234567 = _mm256_load_ps(w + 144); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx_acc2() local
240 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi8x01234567, vk8x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx_acc2()
293 const __m256 vk8x01234567 = _mm256_load_ps(w + 144); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx_acc2() local
294 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi8x01234567, vk8x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx_acc2()
Dup8x9-minmax-fma3.c139 const __m256 vk8x01234567 = _mm256_load_ps(w + 72); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3() local
140 vacc01234567p0 = _mm256_fmadd_ps(vi8x01234567, vk8x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3()
191 const __m256 vk8x01234567 = _mm256_load_ps(w + 72); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3() local
192 vacc01234567p0 = _mm256_fmadd_ps(vi8x01234567, vk8x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3()
Dup8x9-minmax-avx.c139 const __m256 vk8x01234567 = _mm256_load_ps(w + 72); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx() local
140 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi8x01234567, vk8x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx()
191 const __m256 vk8x01234567 = _mm256_load_ps(w + 72); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx() local
192 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi8x01234567, vk8x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx()
Dup8x9-minmax-fma3-acc2.c139 const __m256 vk8x01234567 = _mm256_load_ps(w + 72); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2() local
140 vacc01234567p0 = _mm256_fmadd_ps(vi8x01234567, vk8x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2()
193 const __m256 vk8x01234567 = _mm256_load_ps(w + 72); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2() local
194 vacc01234567p0 = _mm256_fmadd_ps(vi8x01234567, vk8x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2()
Dup8x9-minmax-avx-acc2.c139 const __m256 vk8x01234567 = _mm256_load_ps(w + 72); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx_acc2() local
140 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi8x01234567, vk8x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx_acc2()
193 const __m256 vk8x01234567 = _mm256_load_ps(w + 72); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx_acc2() local
194 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi8x01234567, vk8x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx_acc2()
/external/XNNPACK/src/f16-dwconv/gen/
Dup16x9-minmax-neonfp16arith-acc2.c149 const float16x8_t vk8x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2() local
151 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi8x01234567, vk8x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2()
203 const float16x8_t vk8x01234567 = vld1q_f16(w + 136); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2() local
204 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi8x01234567, vk8x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2()
251 const float16x8_t vk8x01234567 = vld1q_f16(w + 144); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2() local
252 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi8x01234567, vk8x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2()
Dup16x9-minmax-neonfp16arith.c149 const float16x8_t vk8x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith() local
151 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi8x01234567, vk8x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith()
200 const float16x8_t vk8x01234567 = vld1q_f16(w + 136); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith() local
201 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi8x01234567, vk8x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith()
246 const float16x8_t vk8x01234567 = vld1q_f16(w + 144); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith() local
247 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi8x01234567, vk8x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith()
Dup8x9-minmax-neonfp16arith.c123 const float16x8_t vk8x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith() local
124 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi8x01234567, vk8x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith()
169 const float16x8_t vk8x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith() local
170 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi8x01234567, vk8x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith()
Dup8x9-minmax-neonfp16arith-acc2.c123 const float16x8_t vk8x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2() local
124 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi8x01234567, vk8x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2()
171 const float16x8_t vk8x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2() local
172 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi8x01234567, vk8x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2()
Dup16x9-minmax-fma3.c168 const __m256 vk8x01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) (w + 144))); in xnn_f16_dwconv_minmax_ukernel_up16x9__fma3() local
170 …vacc01234567p0 = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_fmadd_ps(vi8x01234567, vk8x01234567, vacc0… in xnn_f16_dwconv_minmax_ukernel_up16x9__fma3()
239 const __m256 vk8x01234567 = _mm256_cvtph_ps(_mm_load_si128((const __m128i*) (w + 144))); in xnn_f16_dwconv_minmax_ukernel_up16x9__fma3() local
240 …vacc01234567p0 = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_fmadd_ps(vi8x01234567, vk8x01234567, vacc0… in xnn_f16_dwconv_minmax_ukernel_up16x9__fma3()
299 const __m256 vk8x01234567 = _mm256_cvtph_ps(_mm_load_si128((const __m128i*) (w + 144))); in xnn_f16_dwconv_minmax_ukernel_up16x9__fma3() local
300 …vacc01234567p0 = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_fmadd_ps(vi8x01234567, vk8x01234567, vacc0… in xnn_f16_dwconv_minmax_ukernel_up16x9__fma3()
Dup8x9-minmax-fma3-acc2.c142 const __m256 vk8x01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) (w + 72))); in xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2() local
143 …vacc01234567p0 = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_fmadd_ps(vi8x01234567, vk8x01234567, vacc0… in xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2()
204 const __m256 vk8x01234567 = _mm256_cvtph_ps(_mm_load_si128((const __m128i*) (w + 72))); in xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2() local
205 …vacc01234567p0 = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_fmadd_ps(vi8x01234567, vk8x01234567, vacc0… in xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2()
Dup8x9-minmax-fma3.c142 const __m256 vk8x01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) (w + 72))); in xnn_f16_dwconv_minmax_ukernel_up8x9__fma3() local
143 …vacc01234567p0 = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_fmadd_ps(vi8x01234567, vk8x01234567, vacc0… in xnn_f16_dwconv_minmax_ukernel_up8x9__fma3()
202 const __m256 vk8x01234567 = _mm256_cvtph_ps(_mm_load_si128((const __m128i*) (w + 72))); in xnn_f16_dwconv_minmax_ukernel_up8x9__fma3() local
203 …vacc01234567p0 = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_fmadd_ps(vi8x01234567, vk8x01234567, vacc0… in xnn_f16_dwconv_minmax_ukernel_up8x9__fma3()
Dup32x9-minmax-neonfp16arith-acc2.c201 const float16x8_t vk8x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2() local
205 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi8x01234567, vk8x01234567); in xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2()
267 const float16x8_t vk8x01234567 = vld1q_f16(w + 280); in xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2() local
268 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi8x01234567, vk8x01234567); in xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2()
315 const float16x8_t vk8x01234567 = vld1q_f16(w + 288); in xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2() local
316 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi8x01234567, vk8x01234567); in xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2()
/external/XNNPACK/src/qs8-dwconv/gen/
Dup8x9-minmax-fp32-neonv8-mul16.c141 … const int16x8_t vk8x01234567 = vmovl_s8(vld1_s8(w)); w = (const void*) ((const int8_t*) w + 8); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16() local
143 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi8x01234567), vget_low_s16(vk8x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16()
144 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi8x01234567), vget_high_s16(vk8x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16()
221 const int16x8_t vk8x01234567 = vmovl_s8(vld1_s8((const void*) ((const int8_t*) w + 64))); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16() local
223 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi8x01234567), vget_low_s16(vk8x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16()
224 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi8x01234567), vget_high_s16(vk8x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16()
Dup8x9-minmax-rndnu-neon-mul16.c142 … const int16x8_t vk8x01234567 = vmovl_s8(vld1_s8(w)); w = (const void*) ((const int8_t*) w + 8); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16() local
144 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi8x01234567), vget_low_s16(vk8x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16()
145 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi8x01234567), vget_high_s16(vk8x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16()
222 const int16x8_t vk8x01234567 = vmovl_s8(vld1_s8((const void*) ((const int8_t*) w + 64))); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16() local
224 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi8x01234567), vget_low_s16(vk8x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16()
225 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi8x01234567), vget_high_s16(vk8x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16()
Dup8x9-minmax-fp32-neon-mul16.c141 … const int16x8_t vk8x01234567 = vmovl_s8(vld1_s8(w)); w = (const void*) ((const int8_t*) w + 8); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16() local
143 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi8x01234567), vget_low_s16(vk8x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16()
144 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi8x01234567), vget_high_s16(vk8x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16()
222 const int16x8_t vk8x01234567 = vmovl_s8(vld1_s8((const void*) ((const int8_t*) w + 64))); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16() local
224 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi8x01234567), vget_low_s16(vk8x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16()
225 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi8x01234567), vget_high_s16(vk8x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16()
/external/XNNPACK/src/qc8-dwconv/gen/
Dup8x9-minmax-fp32-neonv8-mul16.c140 … const int16x8_t vk8x01234567 = vmovl_s8(vld1_s8(w)); w = (const void*) ((const int8_t*) w + 8); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16() local
142 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi8x01234567), vget_low_s16(vk8x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16()
143 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi8x01234567), vget_high_s16(vk8x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16()
223 const int16x8_t vk8x01234567 = vmovl_s8(vld1_s8((const void*) ((const int8_t*) w + 64))); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16() local
225 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi8x01234567), vget_low_s16(vk8x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16()
226 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi8x01234567), vget_high_s16(vk8x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16()
Dup8x9-minmax-fp32-neon-mul16.c140 … const int16x8_t vk8x01234567 = vmovl_s8(vld1_s8(w)); w = (const void*) ((const int8_t*) w + 8); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16() local
142 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi8x01234567), vget_low_s16(vk8x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16()
143 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi8x01234567), vget_high_s16(vk8x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16()
224 const int16x8_t vk8x01234567 = vmovl_s8(vld1_s8((const void*) ((const int8_t*) w + 64))); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16() local
226 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi8x01234567), vget_low_s16(vk8x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16()
227 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi8x01234567), vget_high_s16(vk8x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16()
Dup8x9-minmax-fp32-sse2-mul16-add16.c181 …const __m128i vk8x01234567 = _mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t)… in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16() local
185 … const __m128i vxk8x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vk8x01234567, vk8x01234567), 8); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16()
314 …const __m128i vk8x01234567 = _mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t)… in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16() local
317 … const __m128i vxk8x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vk8x01234567, vk8x01234567), 8); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16()
/external/XNNPACK/src/qu8-dwconv/gen/
Dup8x9-minmax-fp32-neonv8-mul16.c142 …const int16x8_t vk8x01234567 = vreinterpretq_s16_u16(vsubl_u8(vld1_u8(w), vkernel_zero_point)); w … in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16() local
144 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi8x01234567), vget_low_s16(vk8x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16()
145 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi8x01234567), vget_high_s16(vk8x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16()
222 …const int16x8_t vk8x01234567 = vreinterpretq_s16_u16(vsubl_u8(vld1_u8((const void*) ((const uint8_… in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16() local
224 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi8x01234567), vget_low_s16(vk8x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16()
225 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi8x01234567), vget_high_s16(vk8x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16()
Dup8x9-minmax-rndnu-neon-mul16.c143 …const int16x8_t vk8x01234567 = vreinterpretq_s16_u16(vsubl_u8(vld1_u8(w), vkernel_zero_point)); w … in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16() local
145 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi8x01234567), vget_low_s16(vk8x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16()
146 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi8x01234567), vget_high_s16(vk8x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16()
223 …const int16x8_t vk8x01234567 = vreinterpretq_s16_u16(vsubl_u8(vld1_u8((const void*) ((const uint8_… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16() local
225 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi8x01234567), vget_low_s16(vk8x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16()
226 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi8x01234567), vget_high_s16(vk8x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16()
Dup8x9-minmax-fp32-neon-mul16.c142 …const int16x8_t vk8x01234567 = vreinterpretq_s16_u16(vsubl_u8(vld1_u8(w), vkernel_zero_point)); w … in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16() local
144 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi8x01234567), vget_low_s16(vk8x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16()
145 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi8x01234567), vget_high_s16(vk8x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16()
223 …const int16x8_t vk8x01234567 = vreinterpretq_s16_u16(vsubl_u8(vld1_u8((const void*) ((const uint8_… in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16() local
225 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi8x01234567), vget_low_s16(vk8x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16()
226 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi8x01234567), vget_high_s16(vk8x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16()

12345678910>>...12