/external/XNNPACK/src/f32-dwconv/gen/ |
D | up8x25-minmax-avx.c | 282 const __m256 vi19x01234567 = _mm256_loadu_ps(i19); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx() local 286 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi19x01234567, vk19x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx() 410 const __m256 vi19x01234567 = _mm256_maskload_ps(i19, vmask); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx() local 412 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi19x01234567, vk19x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx()
|
D | up8x25-minmax-fma3-acc2.c | 282 const __m256 vi19x01234567 = _mm256_loadu_ps(i19); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3_acc2() local 286 vacc01234567p1 = _mm256_fmadd_ps(vi19x01234567, vk19x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3_acc2() 412 const __m256 vi19x01234567 = _mm256_maskload_ps(i19, vmask); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3_acc2() local 414 vacc01234567p1 = _mm256_fmadd_ps(vi19x01234567, vk19x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3_acc2()
|
D | up8x25-minmax-fma3.c | 282 const __m256 vi19x01234567 = _mm256_loadu_ps(i19); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3() local 286 vacc01234567p0 = _mm256_fmadd_ps(vi19x01234567, vk19x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3() 410 const __m256 vi19x01234567 = _mm256_maskload_ps(i19, vmask); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3() local 412 vacc01234567p0 = _mm256_fmadd_ps(vi19x01234567, vk19x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3()
|
D | up16x25-minmax-avx.c | 340 const __m256 vi19x01234567 = _mm256_loadu_ps(i19); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx() local 346 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi19x01234567, vk19x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx() 523 const __m256 vi19x01234567 = _mm256_loadu_ps(i19); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx() local 527 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi19x01234567, vk19x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx() 651 const __m256 vi19x01234567 = _mm256_maskload_ps(i19, vmask); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx() local 653 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi19x01234567, vk19x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
|
D | up16x25-minmax-fma3-acc2.c | 340 const __m256 vi19x01234567 = _mm256_loadu_ps(i19); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2() local 346 vacc01234567p1 = _mm256_fmadd_ps(vi19x01234567, vk19x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2() 526 const __m256 vi19x01234567 = _mm256_loadu_ps(i19); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2() local 530 vacc01234567p1 = _mm256_fmadd_ps(vi19x01234567, vk19x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2() 656 const __m256 vi19x01234567 = _mm256_maskload_ps(i19, vmask); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2() local 658 vacc01234567p1 = _mm256_fmadd_ps(vi19x01234567, vk19x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2()
|
D | up16x25-minmax-fma3.c | 340 const __m256 vi19x01234567 = _mm256_loadu_ps(i19); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3() local 346 vacc01234567p0 = _mm256_fmadd_ps(vi19x01234567, vk19x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3() 523 const __m256 vi19x01234567 = _mm256_loadu_ps(i19); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3() local 527 vacc01234567p0 = _mm256_fmadd_ps(vi19x01234567, vk19x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3() 651 const __m256 vi19x01234567 = _mm256_maskload_ps(i19, vmask); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3() local 653 vacc01234567p0 = _mm256_fmadd_ps(vi19x01234567, vk19x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3()
|
D | up8x25-minmax-avx-acc2.c | 282 const __m256 vi19x01234567 = _mm256_loadu_ps(i19); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx_acc2() local 286 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi19x01234567, vk19x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx_acc2() 412 const __m256 vi19x01234567 = _mm256_maskload_ps(i19, vmask); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx_acc2() local 414 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi19x01234567, vk19x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx_acc2()
|
D | up16x25-minmax-avx-acc2.c | 340 const __m256 vi19x01234567 = _mm256_loadu_ps(i19); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2() local 346 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi19x01234567, vk19x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2() 526 const __m256 vi19x01234567 = _mm256_loadu_ps(i19); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2() local 530 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi19x01234567, vk19x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2() 656 const __m256 vi19x01234567 = _mm256_maskload_ps(i19, vmask); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2() local 658 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi19x01234567, vk19x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2()
|
/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up8x25-minmax-fp32-neon-mul16.c | 286 const int16x8_t vi19x01234567 = vmovl_s8(vld1_s8(i19)); i19 += 8; in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() local 289 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi19x01234567), vget_low_s16(vk19x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() 290 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi19x01234567), vget_high_s16(vk19x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() 452 const int16x8_t vi19x01234567 = vmovl_s8(vld1_s8(i19)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() local 455 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi19x01234567), vget_low_s16(vk19x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() 456 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi19x01234567), vget_high_s16(vk19x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16()
|
D | up8x25-minmax-rndnu-neon-mul16.c | 287 const int16x8_t vi19x01234567 = vmovl_s8(vld1_s8(i19)); i19 += 8; in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16() local 290 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi19x01234567), vget_low_s16(vk19x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16() 291 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi19x01234567), vget_high_s16(vk19x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16() 452 const int16x8_t vi19x01234567 = vmovl_s8(vld1_s8(i19)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16() local 455 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi19x01234567), vget_low_s16(vk19x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16() 456 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi19x01234567), vget_high_s16(vk19x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16()
|
D | up8x25-minmax-fp32-neonv8-mul16.c | 286 const int16x8_t vi19x01234567 = vmovl_s8(vld1_s8(i19)); i19 += 8; in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() local 289 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi19x01234567), vget_low_s16(vk19x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() 290 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi19x01234567), vget_high_s16(vk19x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() 451 const int16x8_t vi19x01234567 = vmovl_s8(vld1_s8(i19)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() local 454 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi19x01234567), vget_low_s16(vk19x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() 455 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi19x01234567), vget_high_s16(vk19x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16()
|
D | up8x25-minmax-fp32-sse2-mul16-add16.c | 385 const __m128i vi19x01234567 = _mm_loadl_epi64((const __m128i*) i19); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16() local 389 … const __m128i vxi19x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vi19x01234567, vi19x01234567), 8); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16() 689 const __m128i vi19x01234567 = _mm_loadl_epi64((const __m128i*) i19); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16() local 692 … const __m128i vxi19x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vi19x01234567, vi19x01234567), 8); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16()
|
/external/XNNPACK/src/qc8-dwconv/gen/ |
D | up8x25-minmax-fp32-neonv8-mul16.c | 285 const int16x8_t vi19x01234567 = vmovl_s8(vld1_s8(i19)); i19 += 8; in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() local 288 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi19x01234567), vget_low_s16(vk19x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() 289 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi19x01234567), vget_high_s16(vk19x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() 453 const int16x8_t vi19x01234567 = vmovl_s8(vld1_s8(i19)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() local 456 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi19x01234567), vget_low_s16(vk19x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() 457 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi19x01234567), vget_high_s16(vk19x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16()
|
D | up8x25-minmax-fp32-neon-mul16.c | 285 const int16x8_t vi19x01234567 = vmovl_s8(vld1_s8(i19)); i19 += 8; in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() local 288 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi19x01234567), vget_low_s16(vk19x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() 289 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi19x01234567), vget_high_s16(vk19x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() 454 const int16x8_t vi19x01234567 = vmovl_s8(vld1_s8(i19)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() local 457 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi19x01234567), vget_low_s16(vk19x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() 458 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi19x01234567), vget_high_s16(vk19x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16()
|
/external/XNNPACK/src/f16-dwconv/gen/ |
D | up8x25-minmax-neonfp16arith.c | 246 const float16x8_t vi19x01234567 = vld1q_f16(i19); i19 += 8; in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith() local 248 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi19x01234567, vk19x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith() 356 const float16x8_t vi19x01234567 = vld1q_f16(i19); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith() local 358 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi19x01234567, vk19x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith()
|
D | up8x25-minmax-neonfp16arith-acc2.c | 246 const float16x8_t vi19x01234567 = vld1q_f16(i19); i19 += 8; in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2() local 248 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi19x01234567, vk19x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2() 358 const float16x8_t vi19x01234567 = vld1q_f16(i19); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2() local 360 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi19x01234567, vk19x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2()
|
D | up16x25-minmax-neonfp16arith-acc2.c | 304 const float16x8_t vi19x01234567 = vld1q_f16(i19); i19 += 8; in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2() local 308 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi19x01234567, vk19x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2() 438 const float16x8_t vi19x01234567 = vld1q_f16(i19); i19 += 8; in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2() local 440 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi19x01234567, vk19x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2() 550 const float16x8_t vi19x01234567 = vld1q_f16(i19); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2() local 552 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi19x01234567, vk19x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2()
|
D | up16x25-minmax-neonfp16arith.c | 304 const float16x8_t vi19x01234567 = vld1q_f16(i19); i19 += 8; in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith() local 308 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi19x01234567, vk19x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith() 435 const float16x8_t vi19x01234567 = vld1q_f16(i19); i19 += 8; in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith() local 437 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi19x01234567, vk19x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith() 545 const float16x8_t vi19x01234567 = vld1q_f16(i19); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith() local 547 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi19x01234567, vk19x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith()
|
D | up8x25-minmax-fma3-acc2.c | 285 const __m256 vi19x01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i19)); in xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2() local 289 …vacc01234567p1 = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_fmadd_ps(vi19x01234567, vk19x01234567, vac… in xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2() 433 const __m256 vi19x01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i19)); in xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2() local 436 …vacc01234567p1 = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_fmadd_ps(vi19x01234567, vk19x01234567, vac… in xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2()
|
D | up8x25-minmax-fma3.c | 285 const __m256 vi19x01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i19)); in xnn_f16_dwconv_minmax_ukernel_up8x25__fma3() local 289 …vacc01234567p0 = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_fmadd_ps(vi19x01234567, vk19x01234567, vac… in xnn_f16_dwconv_minmax_ukernel_up8x25__fma3() 431 const __m256 vi19x01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i19)); in xnn_f16_dwconv_minmax_ukernel_up8x25__fma3() local 434 …vacc01234567p0 = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_fmadd_ps(vi19x01234567, vk19x01234567, vac… in xnn_f16_dwconv_minmax_ukernel_up8x25__fma3()
|
/external/XNNPACK/src/qu8-dwconv/gen/ |
D | up8x25-minmax-fp32-neonv8-mul16.c | 287 const int16x8_t vi19x01234567 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(i19))); i19 += 8; in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() local 290 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi19x01234567), vget_low_s16(vk19x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() 291 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi19x01234567), vget_high_s16(vk19x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() 452 const int16x8_t vi19x01234567 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(i19))); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() local 455 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi19x01234567), vget_low_s16(vk19x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() 456 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi19x01234567), vget_high_s16(vk19x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16()
|
D | up8x25-minmax-fp32-neon-mul16.c | 287 const int16x8_t vi19x01234567 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(i19))); i19 += 8; in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() local 290 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi19x01234567), vget_low_s16(vk19x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() 291 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi19x01234567), vget_high_s16(vk19x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() 453 const int16x8_t vi19x01234567 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(i19))); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() local 456 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi19x01234567), vget_low_s16(vk19x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() 457 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi19x01234567), vget_high_s16(vk19x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16()
|
D | up8x25-minmax-rndnu-neon-mul8.c | 327 const uint8x8_t vi19x01234567 = vld1_u8(i19); i19 += 8; in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8() local 330 vprod01234567 = vmull_u8(vi19x01234567, vk19x01234567); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8() 331 vsum01234567 = vaddw_u8(vsum01234567, vi19x01234567); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8() 504 const int16x8_t vi19x01234567 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(i19))); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8() local 507 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi19x01234567), vget_low_s16(vk19x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8() 508 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi19x01234567), vget_high_s16(vk19x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8()
|
D | up8x25-minmax-fp32-wasmsimd-mul16.c | 357 const v128_t vi19x01234567 = wasm_u16x8_load8x8(i19); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16() local 359 vsumx01234567 = wasm_i16x8_add(vsumx01234567, vi19x01234567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16() 362 vprod01234567 = wasm_i16x8_mul(vi19x01234567, vk19x01234567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16() 627 const v128_t vi19x01234567 = wasm_u16x8_load8x8(i19); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16() local 629 vsumx01234567 = wasm_i16x8_add(vsumx01234567, vi19x01234567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16() 631 vprod01234567 = wasm_i16x8_mul(vi19x01234567, vk19x01234567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16()
|
D | up8x25-minmax-rndnu-neon-mul16.c | 288 const int16x8_t vi19x01234567 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(i19))); i19 += 8; in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16() local 291 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi19x01234567), vget_low_s16(vk19x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16() 292 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi19x01234567), vget_high_s16(vk19x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16() 453 const int16x8_t vi19x01234567 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(i19))); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16() local 456 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi19x01234567), vget_low_s16(vk19x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16() 457 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi19x01234567), vget_high_s16(vk19x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16()
|