/external/XNNPACK/src/qs8-gavgpool/gen/ |
D | 7p7x-minmax-fp32-sse2-c8.c | 69 const __m128i vi5x01234567 = _mm_loadl_epi64((const __m128i*) i5); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c8() local 73 const __m128i vxi5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vi5x01234567, vi5x01234567), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c8() 130 const __m128i vi5x01234567 = _mm_loadl_epi64((const __m128i*) i5); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c8() local 134 … const __m128i vxi5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vi5x01234567, vi5x01234567), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c8() 211 const __m128i vi5x01234567 = _mm_loadl_epi64((const __m128i*) i5); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c8() local 215 const __m128i vxi5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vi5x01234567, vi5x01234567), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c8() 278 const __m128i vi5x01234567 = _mm_loadl_epi64((const __m128i*) i5); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c8() local 287 … const __m128i vxi5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vi5x01234567, vi5x01234567), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c8()
|
D | 7p7x-minmax-fp32-neon-c8.c | 54 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8() local 57 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8() 89 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8() local 92 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8() 147 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8() local 150 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8() 200 const int8x8_t vi5x01234567 = vld1_s8(i5); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8() local 203 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8()
|
D | 7p7x-minmax-rndnu-neon-c8.c | 54 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8() local 57 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8() 89 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8() local 92 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8() 148 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8() local 151 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8() 199 const int8x8_t vi5x01234567 = vld1_s8(i5); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8() local 202 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8()
|
D | 7p7x-minmax-fp32-neonv8-c8.c | 55 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8() local 58 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8() 90 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8() local 93 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8() 147 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8() local 150 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8() 198 const int8x8_t vi5x01234567 = vld1_s8(i5); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8() local 201 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8()
|
D | 7x-minmax-fp32-sse2-c8.c | 87 const __m128i vi5x01234567 = _mm_loadl_epi64((const __m128i*) i5); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__sse2_c8() local 91 const __m128i vxi5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vi5x01234567, vi5x01234567), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__sse2_c8() 153 const __m128i vi5x01234567 = _mm_loadl_epi64((const __m128i*) i5); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__sse2_c8() local 162 … const __m128i vxi5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vi5x01234567, vi5x01234567), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__sse2_c8()
|
D | 7p7x-minmax-fp32-sse2-c24.c | 91 const __m128i vi5x01234567 = _mm_loadl_epi64((const __m128i*) i5); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c24() local 101 const __m128i vxi5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vi5x01234567, vi5x01234567), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c24() 171 const __m128i vi5x01234567 = _mm_loadl_epi64((const __m128i*) i5); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c24() local 180 … const __m128i vxi5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vi5x01234567, vi5x01234567), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c24() 260 const __m128i vi5x01234567 = _mm_loadl_epi64((const __m128i*) i5); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c24() local 270 … const __m128i vxi5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vi5x01234567, vi5x01234567), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c24() 340 const __m128i vi5x01234567 = _mm_loadl_epi64((const __m128i*) i5); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c24() local 349 … const __m128i vxi5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vi5x01234567, vi5x01234567), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c24() 449 const __m128i vi5x01234567 = _mm_loadl_epi64((const __m128i*) i5); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c24() local 459 const __m128i vxi5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vi5x01234567, vi5x01234567), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c24() [all …]
|
D | 7p7x-minmax-fp32-sse2-c16.c | 80 const __m128i vi5x01234567 = _mm_loadl_epi64((const __m128i*) i5); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c16() local 87 const __m128i vxi5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vi5x01234567, vi5x01234567), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c16() 168 const __m128i vi5x01234567 = _mm_loadl_epi64((const __m128i*) i5); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c16() local 175 … const __m128i vxi5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vi5x01234567, vi5x01234567), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c16() 276 const __m128i vi5x01234567 = _mm_loadl_epi64((const __m128i*) i5); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c16() local 283 const __m128i vxi5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vi5x01234567, vi5x01234567), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c16() 367 const __m128i vi5x01234567 = _mm_loadl_epi64((const __m128i*) i5); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c16() local 376 … const __m128i vxi5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vi5x01234567, vi5x01234567), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c16()
|
D | 7p7x-minmax-rndnu-neon-c24.c | 70 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() local 77 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() 111 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() local 114 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() 165 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() local 172 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() 212 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() local 215 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() 290 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() local 297 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() [all …]
|
D | 7p7x-minmax-fp32-neon-c24.c | 70 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24() local 77 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24() 111 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24() local 114 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24() 165 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24() local 172 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24() 212 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24() local 215 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24() 289 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24() local 296 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24() [all …]
|
D | 7p7x-minmax-fp32-neonv8-c24.c | 71 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24() local 78 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24() 112 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24() local 115 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24() 166 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24() local 173 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24() 213 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24() local 216 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24() 289 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24() local 296 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24() [all …]
|
/external/XNNPACK/src/f16-gavgpool/gen/ |
D | 7p7x-minmax-neonfp16arith-c16.c | 61 const float16x8_t vi5x01234567 = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c16() local 66 vacc01234567 = vaddq_f16(vacc01234567, vi5x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c16() 86 const float16x8_t vi5x01234567 = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c16() local 89 vacc01234567 = vaddq_f16(vacc01234567, vi5x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c16() 132 const float16x8_t vi5x01234567 = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c16() local 137 vacc01234567 = vaddq_f16(vacc01234567, vi5x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c16() 159 const float16x8_t vi5x01234567 = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c16() local 162 vacc01234567 = vaddq_f16(vacc01234567, vi5x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c16() 224 const float16x8_t vi5x01234567 = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c16() local 229 vacc01234567 = vaddq_f16(vacc01234567, vi5x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c16() [all …]
|
D | 7p7x-minmax-neonfp16arith-c8.c | 53 const float16x8_t vi5x01234567 = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8() local 56 vacc01234567 = vaddq_f16(vacc01234567, vi5x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8() 86 const float16x8_t vi5x01234567 = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8() local 89 vacc01234567 = vaddq_f16(vacc01234567, vi5x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8() 138 const float16x8_t vi5x01234567 = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8() local 141 vacc01234567 = vaddq_f16(vacc01234567, vi5x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8() 165 const float16x8_t vi5x01234567 = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8() local 168 vacc01234567 = vaddq_f16(vacc01234567, vi5x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8()
|
D | 7p7x-minmax-neonfp16arith-c24.c | 69 const float16x8_t vi5x01234567 = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c24() local 76 vacc01234567 = vaddq_f16(vacc01234567, vi5x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c24() 100 const float16x8_t vi5x01234567 = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c24() local 103 vacc01234567 = vaddq_f16(vacc01234567, vi5x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c24() 156 const float16x8_t vi5x01234567 = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c24() local 163 vacc01234567 = vaddq_f16(vacc01234567, vi5x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c24() 189 const float16x8_t vi5x01234567 = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c24() local 192 vacc01234567 = vaddq_f16(vacc01234567, vi5x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c24() 264 const float16x8_t vi5x01234567 = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c24() local 271 vacc01234567 = vaddq_f16(vacc01234567, vi5x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c24() [all …]
|
D | 7p7x-minmax-f16c-c8.c | 54 const __m256 vi5x01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i5)); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c8() local 57 …vacc01234567 = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vacc01234567), vi5x01234567), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c8() 87 const __m256 vi5x01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i5)); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c8() local 90 …vacc01234567 = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vacc01234567), vi5x01234567), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c8() 140 const __m256 vi5x01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i5)); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c8() local 143 …vacc01234567 = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vacc01234567), vi5x01234567), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c8() 168 const __m256 vi5x01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i5)); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c8() local 171 …vacc01234567 = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vacc01234567), vi5x01234567), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c8()
|
D | 7p7x-minmax-f16c-c16.c | 62 const __m256 vi5x01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i5)); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c16() local 67 …vacc01234567 = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vacc01234567), vi5x01234567), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c16() 87 const __m256 vi5x01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i5)); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c16() local 90 …vacc01234567 = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vacc01234567), vi5x01234567), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c16() 133 const __m256 vi5x01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i5)); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c16() local 138 …vacc01234567 = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vacc01234567), vi5x01234567), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c16() 160 const __m256 vi5x01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i5)); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c16() local 163 …vacc01234567 = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vacc01234567), vi5x01234567), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c16() 227 const __m256 vi5x01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i5)); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c16() local 232 …vacc01234567 = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vacc01234567), vi5x01234567), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c16() [all …]
|
D | 7p7x-minmax-neonfp16arith-c32.c | 77 const float16x8_t vi5x01234567 = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() local 86 vacc01234567 = vaddq_f16(vacc01234567, vi5x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() 114 const float16x8_t vi5x01234567 = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() local 117 vacc01234567 = vaddq_f16(vacc01234567, vi5x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() 180 const float16x8_t vi5x01234567 = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() local 189 vacc01234567 = vaddq_f16(vacc01234567, vi5x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() 219 const float16x8_t vi5x01234567 = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() local 222 vacc01234567 = vaddq_f16(vacc01234567, vi5x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() 304 const float16x8_t vi5x01234567 = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() local 313 vacc01234567 = vaddq_f16(vacc01234567, vi5x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() [all …]
|
D | 7x-minmax-neonfp16arith-c8.c | 70 const float16x8_t vi5x01234567 = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c8() local 73 vacc01234567 = vaddq_f16(vacc01234567, vi5x01234567); in xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c8() 96 const float16x8_t vi5x01234567 = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c8() local 99 vacc01234567 = vaddq_f16(vacc01234567, vi5x01234567); in xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c8()
|
D | 7x-minmax-f16c-c8.c | 77 const __m256 vi5x01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i5)); in xnn_f16_gavgpool_minmax_ukernel_7x__f16c_c8() local 82 …vacc01234567 = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vacc01234567), vi5x01234567), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7x__f16c_c8() 106 const __m256 vi5x01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i5)); in xnn_f16_gavgpool_minmax_ukernel_7x__f16c_c8() local 109 …vacc01234567 = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vacc01234567), vi5x01234567), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7x__f16c_c8()
|
/external/XNNPACK/src/qu8-gavgpool/gen/ |
D | 7p7x-minmax-rndnu-neon-c8.c | 54 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8() local 57 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8() 89 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8() local 92 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8() 148 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8() local 151 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8() 199 const uint8x8_t vi5x01234567 = vld1_u8(i5); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8() local 202 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8()
|
D | 7p7x-minmax-fp32-neonv8-c8.c | 55 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8() local 58 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8() 90 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8() local 93 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8() 147 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8() local 150 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8() 198 const uint8x8_t vi5x01234567 = vld1_u8(i5); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8() local 201 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8()
|
D | 7p7x-minmax-fp32-neon-c8.c | 54 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8() local 57 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8() 89 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8() local 92 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8() 147 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8() local 150 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8() 200 const uint8x8_t vi5x01234567 = vld1_u8(i5); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8() local 203 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8()
|
D | 7p7x-minmax-fp32-sse2-c8.c | 70 const __m128i vi5x01234567 = _mm_loadl_epi64((const __m128i*) i5); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c8() local 74 const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, vzero); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c8() 130 const __m128i vi5x01234567 = _mm_loadl_epi64((const __m128i*) i5); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c8() local 134 const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, vzero); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c8() 210 const __m128i vi5x01234567 = _mm_loadl_epi64((const __m128i*) i5); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c8() local 214 const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, vzero); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c8() 276 const __m128i vi5x01234567 = _mm_loadl_epi64((const __m128i*) i5); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c8() local 285 const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, vzero); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c8()
|
D | 7p7x-minmax-rndnu-neon-c24.c | 70 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() local 77 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() 111 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() local 114 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() 165 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() local 172 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() 212 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() local 215 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() 290 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() local 297 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() [all …]
|
D | 7p7x-minmax-fp32-neon-c24.c | 70 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24() local 77 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24() 111 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24() local 114 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24() 165 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24() local 172 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24() 212 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24() local 215 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24() 289 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24() local 296 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24() [all …]
|
D | 7p7x-minmax-fp32-neonv8-c24.c | 71 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24() local 78 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24() 112 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24() local 115 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24() 166 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24() local 173 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24() 213 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24() local 216 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24() 289 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24() local 296 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24() [all …]
|