/external/XNNPACK/src/f16-gavgpool/gen/ |
D | 7p7x-minmax-neonfp16arith-c32.c | 83 const float16x8_t vi5xOPQRSTUV = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() local 92 vaccOPQRSTUV = vaddq_f16(vaccOPQRSTUV, vi5xOPQRSTUV); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() 186 const float16x8_t vi5xOPQRSTUV = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() local 195 vaccOPQRSTUV = vaddq_f16(vaccOPQRSTUV, vi5xOPQRSTUV); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() 310 const float16x8_t vi5xOPQRSTUV = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() local 319 vaccOPQRSTUV = vaddq_f16(vaccOPQRSTUV, vi5xOPQRSTUV); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32()
|
D | 7p7x-minmax-f16c-c32.c | 84 const __m256 vi5xOPQRSTUV = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i5)); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c32() local 93 …vaccOPQRSTUV = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vaccOPQRSTUV), vi5xOPQRSTUV), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c32() 187 const __m256 vi5xOPQRSTUV = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i5)); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c32() local 196 …vaccOPQRSTUV = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vaccOPQRSTUV), vi5xOPQRSTUV), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c32() 313 const __m256 vi5xOPQRSTUV = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i5)); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c32() local 322 …vaccOPQRSTUV = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vaccOPQRSTUV), vi5xOPQRSTUV), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c32()
|
D | 7x-minmax-neonfp16arith-c32.c | 100 const float16x8_t vi5xOPQRSTUV = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c32() local 109 vaccOPQRSTUV = vaddq_f16(vaccOPQRSTUV, vi5xOPQRSTUV); in xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c32()
|
D | 7x-minmax-f16c-c32.c | 107 const __m256 vi5xOPQRSTUV = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) (i5 + 24))); in xnn_f16_gavgpool_minmax_ukernel_7x__f16c_c32() local 118 …vaccOPQRSTUV = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vaccOPQRSTUV), vi5xOPQRSTUV), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7x__f16c_c32()
|
/external/XNNPACK/src/qs8-gavgpool/gen/ |
D | 7p7x-minmax-rndnu-neon-c32.c | 84 const int8x8_t vi5xOPQRSTUV = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32() local 93 vsumOPQRSTUV = vaddw_s8(vsumOPQRSTUV, vi5xOPQRSTUV); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32() 196 const int8x8_t vi5xOPQRSTUV = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32() local 205 vsumOPQRSTUV = vaddw_s8(vsumOPQRSTUV, vi5xOPQRSTUV); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32() 340 const int8x8_t vi5xOPQRSTUV = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32() local 349 vsumOPQRSTUV = vaddw_s8(vsumOPQRSTUV, vi5xOPQRSTUV); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32()
|
D | 7p7x-minmax-fp32-neonv8-c32.c | 85 const int8x8_t vi5xOPQRSTUV = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c32() local 94 vsumOPQRSTUV = vaddw_s8(vsumOPQRSTUV, vi5xOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c32() 197 const int8x8_t vi5xOPQRSTUV = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c32() local 206 vsumOPQRSTUV = vaddw_s8(vsumOPQRSTUV, vi5xOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c32() 339 const int8x8_t vi5xOPQRSTUV = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c32() local 348 vsumOPQRSTUV = vaddw_s8(vsumOPQRSTUV, vi5xOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c32()
|
D | 7p7x-minmax-fp32-neon-c32.c | 84 const int8x8_t vi5xOPQRSTUV = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c32() local 93 vsumOPQRSTUV = vaddw_s8(vsumOPQRSTUV, vi5xOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c32() 196 const int8x8_t vi5xOPQRSTUV = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c32() local 205 vsumOPQRSTUV = vaddw_s8(vsumOPQRSTUV, vi5xOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c32() 339 const int8x8_t vi5xOPQRSTUV = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c32() local 348 vsumOPQRSTUV = vaddw_s8(vsumOPQRSTUV, vi5xOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c32()
|
D | 7x-minmax-fp32-neonv8-c32.c | 103 const int8x8_t vi5xOPQRSTUV = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__neonv8_c32() local 112 vsumOPQRSTUV = vaddw_s8(vsumOPQRSTUV, vi5xOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__neonv8_c32()
|
D | 7x-minmax-rndnu-neon-c32.c | 104 const int8x8_t vi5xOPQRSTUV = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7x__neon_c32() local 113 vsumOPQRSTUV = vaddw_s8(vsumOPQRSTUV, vi5xOPQRSTUV); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7x__neon_c32()
|
D | 7x-minmax-fp32-neon-c32.c | 103 const int8x8_t vi5xOPQRSTUV = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__neon_c32() local 112 vsumOPQRSTUV = vaddw_s8(vsumOPQRSTUV, vi5xOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__neon_c32()
|
/external/XNNPACK/src/qu8-gavgpool/gen/ |
D | 7p7x-minmax-fp32-neonv8-c32.c | 85 const uint8x8_t vi5xOPQRSTUV = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c32() local 94 vsumOPQRSTUV = vaddw_u8(vsumOPQRSTUV, vi5xOPQRSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c32() 197 const uint8x8_t vi5xOPQRSTUV = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c32() local 206 vsumOPQRSTUV = vaddw_u8(vsumOPQRSTUV, vi5xOPQRSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c32() 339 const uint8x8_t vi5xOPQRSTUV = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c32() local 348 vsumOPQRSTUV = vaddw_u8(vsumOPQRSTUV, vi5xOPQRSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c32()
|
D | 7p7x-minmax-fp32-neon-c32.c | 84 const uint8x8_t vi5xOPQRSTUV = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c32() local 93 vsumOPQRSTUV = vaddw_u8(vsumOPQRSTUV, vi5xOPQRSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c32() 196 const uint8x8_t vi5xOPQRSTUV = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c32() local 205 vsumOPQRSTUV = vaddw_u8(vsumOPQRSTUV, vi5xOPQRSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c32() 339 const uint8x8_t vi5xOPQRSTUV = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c32() local 348 vsumOPQRSTUV = vaddw_u8(vsumOPQRSTUV, vi5xOPQRSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c32()
|
D | 7p7x-minmax-rndnu-neon-c32.c | 84 const uint8x8_t vi5xOPQRSTUV = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32() local 93 vsumOPQRSTUV = vaddw_u8(vsumOPQRSTUV, vi5xOPQRSTUV); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32() 196 const uint8x8_t vi5xOPQRSTUV = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32() local 205 vsumOPQRSTUV = vaddw_u8(vsumOPQRSTUV, vi5xOPQRSTUV); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32() 340 const uint8x8_t vi5xOPQRSTUV = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32() local 349 vsumOPQRSTUV = vaddw_u8(vsumOPQRSTUV, vi5xOPQRSTUV); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32()
|
D | 7x-minmax-fp32-neon-c32.c | 103 const uint8x8_t vi5xOPQRSTUV = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__neon_c32() local 112 vsumOPQRSTUV = vaddw_u8(vsumOPQRSTUV, vi5xOPQRSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__neon_c32()
|
D | 7x-minmax-fp32-neonv8-c32.c | 103 const uint8x8_t vi5xOPQRSTUV = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__neonv8_c32() local 112 vsumOPQRSTUV = vaddw_u8(vsumOPQRSTUV, vi5xOPQRSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__neonv8_c32()
|
D | 7x-minmax-rndnu-neon-c32.c | 104 const uint8x8_t vi5xOPQRSTUV = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7x__neon_c32() local 113 vsumOPQRSTUV = vaddw_u8(vsumOPQRSTUV, vi5xOPQRSTUV); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7x__neon_c32()
|
/external/XNNPACK/src/f16-dwconv/gen/ |
D | up32x9-minmax-neonfp16arith-acc2.c | 161 const float16x8_t vi5xOPQRSTUV = vld1q_f16(i5); i5 += 8; in xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2() local 169 vaccOPQRSTUVp1 = vfmaq_f16(vaccOPQRSTUVp1, vi5xOPQRSTUV, vk5xOPQRSTUV); in xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2()
|
D | up32x9-minmax-neonfp16arith.c | 161 const float16x8_t vi5xOPQRSTUV = vld1q_f16(i5); i5 += 8; in xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith() local 169 vaccOPQRSTUVp0 = vfmaq_f16(vaccOPQRSTUVp0, vi5xOPQRSTUV, vk5xOPQRSTUV); in xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith()
|
/external/XNNPACK/src/qc8-dwconv/gen/ |
D | up32x9-minmax-fp32-neon-mul16.c | 193 const int16x8_t vi5xOPQRSTUV = vmovl_s8(vld1_s8(i5)); i5 += 8; in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16() local 202 vaccOPQR = vmlal_s16(vaccOPQR, vget_low_s16(vi5xOPQRSTUV), vget_low_s16(vk5xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16() 203 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi5xOPQRSTUV), vget_high_s16(vk5xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16()
|
D | up32x9-minmax-fp32-neonv8-mul16.c | 193 const int16x8_t vi5xOPQRSTUV = vmovl_s8(vld1_s8(i5)); i5 += 8; in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16() local 202 vaccOPQR = vmlal_s16(vaccOPQR, vget_low_s16(vi5xOPQRSTUV), vget_low_s16(vk5xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16() 203 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi5xOPQRSTUV), vget_high_s16(vk5xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16()
|
/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up32x9-minmax-fp32-neonv8-mul16.c | 194 const int16x8_t vi5xOPQRSTUV = vmovl_s8(vld1_s8(i5)); i5 += 8; in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16() local 203 vaccOPQR = vmlal_s16(vaccOPQR, vget_low_s16(vi5xOPQRSTUV), vget_low_s16(vk5xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16() 204 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi5xOPQRSTUV), vget_high_s16(vk5xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16()
|
D | up32x9-minmax-rndnu-neon-mul16.c | 195 const int16x8_t vi5xOPQRSTUV = vmovl_s8(vld1_s8(i5)); i5 += 8; in xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16() local 204 vaccOPQR = vmlal_s16(vaccOPQR, vget_low_s16(vi5xOPQRSTUV), vget_low_s16(vk5xOPQRSTUV)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16() 205 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi5xOPQRSTUV), vget_high_s16(vk5xOPQRSTUV)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16()
|
D | up32x9-minmax-fp32-neon-mul16.c | 194 const int16x8_t vi5xOPQRSTUV = vmovl_s8(vld1_s8(i5)); i5 += 8; in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16() local 203 vaccOPQR = vmlal_s16(vaccOPQR, vget_low_s16(vi5xOPQRSTUV), vget_low_s16(vk5xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16() 204 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi5xOPQRSTUV), vget_high_s16(vk5xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16()
|
/external/XNNPACK/src/qu8-dwconv/gen/ |
D | up32x9-minmax-fp32-neonv8-mul16.c | 195 const int16x8_t vi5xOPQRSTUV = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(i5))); i5 += 8; in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16() local 204 vaccOPQR = vmlal_s16(vaccOPQR, vget_low_s16(vi5xOPQRSTUV), vget_low_s16(vk5xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16() 205 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi5xOPQRSTUV), vget_high_s16(vk5xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16()
|
D | up32x9-minmax-fp32-neon-mul16.c | 195 const int16x8_t vi5xOPQRSTUV = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(i5))); i5 += 8; in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16() local 204 vaccOPQR = vmlal_s16(vaccOPQR, vget_low_s16(vi5xOPQRSTUV), vget_low_s16(vk5xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16() 205 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi5xOPQRSTUV), vget_high_s16(vk5xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16()
|