/external/XNNPACK/src/f16-gavgpool/gen/ |
D | 7p7x-minmax-neonfp16arith-c32.c | 58 const float16x8_t vi2xOPQRSTUV = vld1q_f16(i2); i2 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() local 68 vaccOPQRSTUV = vaddq_f16(vaccOPQRSTUV, vi2xOPQRSTUV); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() 162 const float16x8_t vi2xOPQRSTUV = vld1q_f16(i2); i2 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() local 171 vaccOPQRSTUV = vaddq_f16(vaccOPQRSTUV, vi2xOPQRSTUV); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() 286 const float16x8_t vi2xOPQRSTUV = vld1q_f16(i2); i2 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() local 295 vaccOPQRSTUV = vaddq_f16(vaccOPQRSTUV, vi2xOPQRSTUV); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32()
|
D | 7p7x-minmax-f16c-c32.c | 59 const __m256 vi2xOPQRSTUV = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i2)); i2 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c32() local 69 …vaccOPQRSTUV = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vaccOPQRSTUV), vi2xOPQRSTUV), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c32() 163 const __m256 vi2xOPQRSTUV = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i2)); i2 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c32() local 172 …vaccOPQRSTUV = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vaccOPQRSTUV), vi2xOPQRSTUV), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c32() 289 const __m256 vi2xOPQRSTUV = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i2)); i2 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c32() local 298 …vaccOPQRSTUV = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vaccOPQRSTUV), vi2xOPQRSTUV), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c32()
|
D | 7x-minmax-neonfp16arith-c32.c | 75 const float16x8_t vi2xOPQRSTUV = vld1q_f16(i2); i2 += 8; in xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c32() local 85 vaccOPQRSTUV = vaddq_f16(vaccOPQRSTUV, vi2xOPQRSTUV); in xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c32()
|
D | 7x-minmax-f16c-c32.c | 79 const __m256 vi2xOPQRSTUV = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) (i2 + 24))); in xnn_f16_gavgpool_minmax_ukernel_7x__f16c_c32() local 91 …vaccOPQRSTUV = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vaccOPQRSTUV), vi2xOPQRSTUV), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7x__f16c_c32()
|
/external/XNNPACK/src/qs8-gavgpool/gen/ |
D | 7p7x-minmax-rndnu-neon-c32.c | 59 const int8x8_t vi2xOPQRSTUV = vld1_s8(i2); i2 += 8; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32() local 69 vsumOPQRSTUV = vaddw_s8(vsumOPQRSTUV, vi2xOPQRSTUV); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32() 171 const int8x8_t vi2xOPQRSTUV = vld1_s8(i2); i2 += 8; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32() local 181 vsumOPQRSTUV = vaddw_s8(vsumOPQRSTUV, vi2xOPQRSTUV); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32() 315 const int8x8_t vi2xOPQRSTUV = vld1_s8(i2); i2 += 8; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32() local 325 vsumOPQRSTUV = vaddw_s8(vsumOPQRSTUV, vi2xOPQRSTUV); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32()
|
D | 7p7x-minmax-fp32-neonv8-c32.c | 60 const int8x8_t vi2xOPQRSTUV = vld1_s8(i2); i2 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c32() local 70 vsumOPQRSTUV = vaddw_s8(vsumOPQRSTUV, vi2xOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c32() 172 const int8x8_t vi2xOPQRSTUV = vld1_s8(i2); i2 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c32() local 182 vsumOPQRSTUV = vaddw_s8(vsumOPQRSTUV, vi2xOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c32() 314 const int8x8_t vi2xOPQRSTUV = vld1_s8(i2); i2 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c32() local 324 vsumOPQRSTUV = vaddw_s8(vsumOPQRSTUV, vi2xOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c32()
|
D | 7p7x-minmax-fp32-neon-c32.c | 59 const int8x8_t vi2xOPQRSTUV = vld1_s8(i2); i2 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c32() local 69 vsumOPQRSTUV = vaddw_s8(vsumOPQRSTUV, vi2xOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c32() 171 const int8x8_t vi2xOPQRSTUV = vld1_s8(i2); i2 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c32() local 181 vsumOPQRSTUV = vaddw_s8(vsumOPQRSTUV, vi2xOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c32() 314 const int8x8_t vi2xOPQRSTUV = vld1_s8(i2); i2 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c32() local 324 vsumOPQRSTUV = vaddw_s8(vsumOPQRSTUV, vi2xOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c32()
|
D | 7x-minmax-fp32-neonv8-c32.c | 78 const int8x8_t vi2xOPQRSTUV = vld1_s8(i2); i2 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__neonv8_c32() local 88 vsumOPQRSTUV = vaddw_s8(vsumOPQRSTUV, vi2xOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__neonv8_c32()
|
D | 7x-minmax-rndnu-neon-c32.c | 79 const int8x8_t vi2xOPQRSTUV = vld1_s8(i2); i2 += 8; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7x__neon_c32() local 89 vsumOPQRSTUV = vaddw_s8(vsumOPQRSTUV, vi2xOPQRSTUV); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7x__neon_c32()
|
D | 7x-minmax-fp32-neon-c32.c | 78 const int8x8_t vi2xOPQRSTUV = vld1_s8(i2); i2 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__neon_c32() local 88 vsumOPQRSTUV = vaddw_s8(vsumOPQRSTUV, vi2xOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__neon_c32()
|
/external/XNNPACK/src/qu8-gavgpool/gen/ |
D | 7p7x-minmax-fp32-neonv8-c32.c | 60 const uint8x8_t vi2xOPQRSTUV = vld1_u8(i2); i2 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c32() local 70 vsumOPQRSTUV = vaddw_u8(vsumOPQRSTUV, vi2xOPQRSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c32() 172 const uint8x8_t vi2xOPQRSTUV = vld1_u8(i2); i2 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c32() local 182 vsumOPQRSTUV = vaddw_u8(vsumOPQRSTUV, vi2xOPQRSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c32() 314 const uint8x8_t vi2xOPQRSTUV = vld1_u8(i2); i2 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c32() local 324 vsumOPQRSTUV = vaddw_u8(vsumOPQRSTUV, vi2xOPQRSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c32()
|
D | 7p7x-minmax-fp32-neon-c32.c | 59 const uint8x8_t vi2xOPQRSTUV = vld1_u8(i2); i2 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c32() local 69 vsumOPQRSTUV = vaddw_u8(vsumOPQRSTUV, vi2xOPQRSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c32() 171 const uint8x8_t vi2xOPQRSTUV = vld1_u8(i2); i2 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c32() local 181 vsumOPQRSTUV = vaddw_u8(vsumOPQRSTUV, vi2xOPQRSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c32() 314 const uint8x8_t vi2xOPQRSTUV = vld1_u8(i2); i2 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c32() local 324 vsumOPQRSTUV = vaddw_u8(vsumOPQRSTUV, vi2xOPQRSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c32()
|
D | 7p7x-minmax-rndnu-neon-c32.c | 59 const uint8x8_t vi2xOPQRSTUV = vld1_u8(i2); i2 += 8; in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32() local 69 vsumOPQRSTUV = vaddw_u8(vsumOPQRSTUV, vi2xOPQRSTUV); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32() 171 const uint8x8_t vi2xOPQRSTUV = vld1_u8(i2); i2 += 8; in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32() local 181 vsumOPQRSTUV = vaddw_u8(vsumOPQRSTUV, vi2xOPQRSTUV); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32() 315 const uint8x8_t vi2xOPQRSTUV = vld1_u8(i2); i2 += 8; in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32() local 325 vsumOPQRSTUV = vaddw_u8(vsumOPQRSTUV, vi2xOPQRSTUV); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32()
|
D | 7x-minmax-fp32-neon-c32.c | 78 const uint8x8_t vi2xOPQRSTUV = vld1_u8(i2); i2 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__neon_c32() local 88 vsumOPQRSTUV = vaddw_u8(vsumOPQRSTUV, vi2xOPQRSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__neon_c32()
|
D | 7x-minmax-fp32-neonv8-c32.c | 78 const uint8x8_t vi2xOPQRSTUV = vld1_u8(i2); i2 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__neonv8_c32() local 88 vsumOPQRSTUV = vaddw_u8(vsumOPQRSTUV, vi2xOPQRSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__neonv8_c32()
|
D | 7x-minmax-rndnu-neon-c32.c | 79 const uint8x8_t vi2xOPQRSTUV = vld1_u8(i2); i2 += 8; in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7x__neon_c32() local 89 vsumOPQRSTUV = vaddw_u8(vsumOPQRSTUV, vi2xOPQRSTUV); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7x__neon_c32()
|
/external/XNNPACK/src/f16-dwconv/gen/ |
D | up32x3-minmax-neonfp16arith.c | 92 const float16x8_t vi2xOPQRSTUV = vld1q_f16(i2); i2 += 8; in xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith() local 100 vaccOPQRSTUVp0 = vfmaq_f16(vaccOPQRSTUVp0, vi2xOPQRSTUV, vk2xOPQRSTUV); in xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith()
|
D | up32x3-minmax-neonfp16arith-acc2.c | 92 const float16x8_t vi2xOPQRSTUV = vld1q_f16(i2); i2 += 8; in xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith_acc2() local 100 vaccOPQRSTUVp0 = vfmaq_f16(vaccOPQRSTUVp0, vi2xOPQRSTUV, vk2xOPQRSTUV); in xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith_acc2()
|
D | up32x4-minmax-neonfp16arith.c | 97 const float16x8_t vi2xOPQRSTUV = vld1q_f16(i2); i2 += 8; in xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith() local 105 vaccOPQRSTUVp0 = vfmaq_f16(vaccOPQRSTUVp0, vi2xOPQRSTUV, vk2xOPQRSTUV); in xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith()
|
D | up32x4-minmax-neonfp16arith-acc2.c | 97 const float16x8_t vi2xOPQRSTUV = vld1q_f16(i2); i2 += 8; in xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2() local 105 vaccOPQRSTUVp0 = vfmaq_f16(vaccOPQRSTUVp0, vi2xOPQRSTUV, vk2xOPQRSTUV); in xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2()
|
D | up32x3-minmax-fma3.c | 97 const __m256 vi2xOPQRSTUV = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) (i2 + 24))); in xnn_f16_dwconv_minmax_ukernel_up32x3__fma3() local 107 …vaccOPQRSTUVp0 = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_fmadd_ps(vi2xOPQRSTUV, vk2xOPQRSTUV, vaccO… in xnn_f16_dwconv_minmax_ukernel_up32x3__fma3()
|
D | up32x3-minmax-fma3-acc2.c | 97 const __m256 vi2xOPQRSTUV = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) (i2 + 24))); in xnn_f16_dwconv_minmax_ukernel_up32x3__fma3_acc2() local 107 …vaccOPQRSTUVp0 = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_fmadd_ps(vi2xOPQRSTUV, vk2xOPQRSTUV, vaccO… in xnn_f16_dwconv_minmax_ukernel_up32x3__fma3_acc2()
|
D | up32x4-minmax-fma3.c | 102 const __m256 vi2xOPQRSTUV = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) (i2 + 24))); in xnn_f16_dwconv_minmax_ukernel_up32x4__fma3() local 112 …vaccOPQRSTUVp0 = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_fmadd_ps(vi2xOPQRSTUV, vk2xOPQRSTUV, vaccO… in xnn_f16_dwconv_minmax_ukernel_up32x4__fma3()
|
D | up32x4-minmax-fma3-acc2.c | 102 const __m256 vi2xOPQRSTUV = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) (i2 + 24))); in xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2() local 112 …vaccOPQRSTUVp0 = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_fmadd_ps(vi2xOPQRSTUV, vk2xOPQRSTUV, vaccO… in xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2()
|
D | up32x9-minmax-neonfp16arith-acc2.c | 122 const float16x8_t vi2xOPQRSTUV = vld1q_f16(i2); i2 += 8; in xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2() local 130 vaccOPQRSTUVp0 = vfmaq_f16(vaccOPQRSTUVp0, vi2xOPQRSTUV, vk2xOPQRSTUV); in xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2()
|