/external/XNNPACK/src/f16-dwconv2d-chw/gen/ |
D | 3x3p1-minmax-neonfp16arith-6x8.c | 100 float16x8_t vi6x89ABCDEF = vld1q_f16(i6); i6 += 8; in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_6x8() local 134 vo5p0 = vfmaq_lane_f16(vo5p0, vi6x89ABCDEF, vget_high_f16(vw01234567), 1); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_6x8() 140 vo4p0 = vfmaq_lane_f16(vo4p0, vi6x89ABCDEF, vw89, 0); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_6x8() 150 const float16x8_t vi6x789ABCDE = vextq_f16(vi6x01234567, vi6x89ABCDEF, 7); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_6x8() 180 vi6x01234567 = vi6x89ABCDEF; in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_6x8() 190 const float16x8_t vi6x9ABCDEFG = vextq_f16(vi6x89ABCDEF, vi6xGHIJKLMN, 1); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_6x8() 220 vi6x89ABCDEF = vi6xGHIJKLMN; in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_6x8() 262 vi6x89ABCDEF = vreinterpretq_f16_u16(vandq_u16(vmask, vreinterpretq_u16_f16(vi6x89ABCDEF))); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_6x8() 278 vo5p0 = vfmaq_lane_f16(vo5p0, vi6x89ABCDEF, vget_high_f16(vw01234567), 1); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_6x8() 284 vo4p0 = vfmaq_lane_f16(vo4p0, vi6x89ABCDEF, vw89, 0); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_6x8() [all …]
|
D | 3x3p1-minmax-neonfp16arith-5x8.c | 93 float16x8_t vi6x89ABCDEF = vld1q_f16(i6); i6 += 8; in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_5x8() local 128 vo4p0 = vfmaq_lane_f16(vo4p0, vi6x89ABCDEF, vw89, 0); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_5x8() 137 const float16x8_t vi6x789ABCDE = vextq_f16(vi6x01234567, vi6x89ABCDEF, 7); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_5x8() 163 vi6x01234567 = vi6x89ABCDEF; in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_5x8() 172 const float16x8_t vi6x9ABCDEFG = vextq_f16(vi6x89ABCDEF, vi6xGHIJKLMN, 1); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_5x8() 198 vi6x89ABCDEF = vi6xGHIJKLMN; in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_5x8() 235 vi6x89ABCDEF = vreinterpretq_f16_u16(vandq_u16(vmask, vreinterpretq_u16_f16(vi6x89ABCDEF))); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_5x8() 254 vo4p0 = vfmaq_lane_f16(vo4p0, vi6x89ABCDEF, vw89, 0); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_5x8() 263 const float16x8_t vi6x789ABCDE = vextq_f16(vi6x01234567, vi6x89ABCDEF, 7); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_5x8() 291 const float16x8_t vi6x9ABCDEFG = vextq_f16(vi6x89ABCDEF, vzero, 1); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_5x8()
|
/external/XNNPACK/src/qs8-gavgpool/gen/ |
D | 7p7x-minmax-fp32-sse2-c16.c | 91 const __m128i vi6x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i6 + 8)); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c16() local 97 const __m128i vxi6x89ABCDEF = _mm_srai_epi16(_mm_unpacklo_epi8(vi6x89ABCDEF, vi6x89ABCDEF), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c16() 179 const __m128i vi6x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i6 + 8)); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c16() local 185 … const __m128i vxi6x89ABCDEF = _mm_srai_epi16(_mm_unpacklo_epi8(vi6x89ABCDEF, vi6x89ABCDEF), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c16() 287 const __m128i vi6x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i6 + 8)); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c16() local 293 const __m128i vxi6x89ABCDEF = _mm_srai_epi16(_mm_unpacklo_epi8(vi6x89ABCDEF, vi6x89ABCDEF), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c16()
|
D | 7p7x-minmax-rndnu-neon-c16.c | 68 const int8x8_t vi6x89ABCDEF = vld1_s8(i6); i6 += 8; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c16() local 71 vsum89ABCDEF = vaddw_s8(vsum89ABCDEF, vi6x89ABCDEF); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c16() 120 const int8x8_t vi6x89ABCDEF = vld1_s8(i6); i6 += 8; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c16() local 127 vsum89ABCDEF = vaddw_s8(vsum89ABCDEF, vi6x89ABCDEF); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c16() 198 const int8x8_t vi6x89ABCDEF = vld1_s8(i6); i6 += 8; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c16() local 205 vsum89ABCDEF = vaddw_s8(vsum89ABCDEF, vi6x89ABCDEF); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c16()
|
D | 7p7x-minmax-fp32-neon-c16.c | 68 const int8x8_t vi6x89ABCDEF = vld1_s8(i6); i6 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c16() local 71 vsum89ABCDEF = vaddw_s8(vsum89ABCDEF, vi6x89ABCDEF); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c16() 120 const int8x8_t vi6x89ABCDEF = vld1_s8(i6); i6 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c16() local 127 vsum89ABCDEF = vaddw_s8(vsum89ABCDEF, vi6x89ABCDEF); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c16() 197 const int8x8_t vi6x89ABCDEF = vld1_s8(i6); i6 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c16() local 204 vsum89ABCDEF = vaddw_s8(vsum89ABCDEF, vi6x89ABCDEF); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c16()
|
D | 7p7x-minmax-fp32-neonv8-c16.c | 69 const int8x8_t vi6x89ABCDEF = vld1_s8(i6); i6 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c16() local 72 vsum89ABCDEF = vaddw_s8(vsum89ABCDEF, vi6x89ABCDEF); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c16() 121 const int8x8_t vi6x89ABCDEF = vld1_s8(i6); i6 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c16() local 128 vsum89ABCDEF = vaddw_s8(vsum89ABCDEF, vi6x89ABCDEF); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c16() 197 const int8x8_t vi6x89ABCDEF = vld1_s8(i6); i6 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c16() local 204 vsum89ABCDEF = vaddw_s8(vsum89ABCDEF, vi6x89ABCDEF); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c16()
|
D | 7p7x-minmax-fp32-sse2-c24.c | 105 const __m128i vi6x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i6 + 8)); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c24() local 114 const __m128i vxi6x89ABCDEF = _mm_srai_epi16(_mm_unpacklo_epi8(vi6x89ABCDEF, vi6x89ABCDEF), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c24() 274 const __m128i vi6x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i6 + 8)); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c24() local 283 … const __m128i vxi6x89ABCDEF = _mm_srai_epi16(_mm_unpacklo_epi8(vi6x89ABCDEF, vi6x89ABCDEF), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c24() 463 const __m128i vi6x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i6 + 8)); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c24() local 472 const __m128i vxi6x89ABCDEF = _mm_srai_epi16(_mm_unpacklo_epi8(vi6x89ABCDEF, vi6x89ABCDEF), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c24()
|
D | 7p7x-minmax-rndnu-neon-c24.c | 78 const int8x8_t vi6x89ABCDEF = vld1_s8(i6); i6 += 8; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() local 83 vsum89ABCDEF = vaddw_s8(vsum89ABCDEF, vi6x89ABCDEF); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() 173 const int8x8_t vi6x89ABCDEF = vld1_s8(i6); i6 += 8; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() local 182 vsum89ABCDEF = vaddw_s8(vsum89ABCDEF, vi6x89ABCDEF); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() 298 const int8x8_t vi6x89ABCDEF = vld1_s8(i6); i6 += 8; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() local 307 vsum89ABCDEF = vaddw_s8(vsum89ABCDEF, vi6x89ABCDEF); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24()
|
D | 7p7x-minmax-fp32-neon-c24.c | 78 const int8x8_t vi6x89ABCDEF = vld1_s8(i6); i6 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24() local 83 vsum89ABCDEF = vaddw_s8(vsum89ABCDEF, vi6x89ABCDEF); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24() 173 const int8x8_t vi6x89ABCDEF = vld1_s8(i6); i6 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24() local 182 vsum89ABCDEF = vaddw_s8(vsum89ABCDEF, vi6x89ABCDEF); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24() 297 const int8x8_t vi6x89ABCDEF = vld1_s8(i6); i6 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24() local 306 vsum89ABCDEF = vaddw_s8(vsum89ABCDEF, vi6x89ABCDEF); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24()
|
D | 7p7x-minmax-fp32-neonv8-c24.c | 79 const int8x8_t vi6x89ABCDEF = vld1_s8(i6); i6 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24() local 84 vsum89ABCDEF = vaddw_s8(vsum89ABCDEF, vi6x89ABCDEF); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24() 174 const int8x8_t vi6x89ABCDEF = vld1_s8(i6); i6 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24() local 183 vsum89ABCDEF = vaddw_s8(vsum89ABCDEF, vi6x89ABCDEF); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24() 297 const int8x8_t vi6x89ABCDEF = vld1_s8(i6); i6 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24() local 306 vsum89ABCDEF = vaddw_s8(vsum89ABCDEF, vi6x89ABCDEF); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24()
|
D | 7x-minmax-fp32-sse2-c16.c | 109 const __m128i vi6x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i6 + 8)); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__sse2_c16() local 115 const __m128i vxi6x89ABCDEF = _mm_srai_epi16(_mm_unpacklo_epi8(vi6x89ABCDEF, vi6x89ABCDEF), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__sse2_c16()
|
D | 7p7x-minmax-rndnu-neon-c32.c | 88 const int8x8_t vi6x89ABCDEF = vld1_s8(i6); i6 += 8; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32() local 95 vsum89ABCDEF = vaddw_s8(vsum89ABCDEF, vi6x89ABCDEF); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32() 200 const int8x8_t vi6x89ABCDEF = vld1_s8(i6); i6 += 8; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32() local 211 vsum89ABCDEF = vaddw_s8(vsum89ABCDEF, vi6x89ABCDEF); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32() 344 const int8x8_t vi6x89ABCDEF = vld1_s8(i6); i6 += 8; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32() local 355 vsum89ABCDEF = vaddw_s8(vsum89ABCDEF, vi6x89ABCDEF); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32()
|
/external/XNNPACK/src/f16-gavgpool/gen/ |
D | 7p7x-minmax-neonfp16arith-c16.c | 67 const float16x8_t vi6x89ABCDEF = vld1q_f16(i6); i6 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c16() local 70 vacc89ABCDEF = vaddq_f16(vacc89ABCDEF, vi6x89ABCDEF); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c16() 138 const float16x8_t vi6x89ABCDEF = vld1q_f16(i6); i6 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c16() local 141 vacc89ABCDEF = vaddq_f16(vacc89ABCDEF, vi6x89ABCDEF); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c16() 230 const float16x8_t vi6x89ABCDEF = vld1q_f16(i6); i6 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c16() local 233 vacc89ABCDEF = vaddq_f16(vacc89ABCDEF, vi6x89ABCDEF); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c16()
|
D | 7p7x-minmax-neonfp16arith-c24.c | 77 const float16x8_t vi6x89ABCDEF = vld1q_f16(i6); i6 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c24() local 82 vacc89ABCDEF = vaddq_f16(vacc89ABCDEF, vi6x89ABCDEF); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c24() 164 const float16x8_t vi6x89ABCDEF = vld1q_f16(i6); i6 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c24() local 169 vacc89ABCDEF = vaddq_f16(vacc89ABCDEF, vi6x89ABCDEF); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c24() 272 const float16x8_t vi6x89ABCDEF = vld1q_f16(i6); i6 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c24() local 277 vacc89ABCDEF = vaddq_f16(vacc89ABCDEF, vi6x89ABCDEF); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c24()
|
D | 7p7x-minmax-f16c-c16.c | 68 const __m256 vi6x89ABCDEF = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i6)); i6 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c16() local 71 …vacc89ABCDEF = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vacc89ABCDEF), vi6x89ABCDEF), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c16() 139 const __m256 vi6x89ABCDEF = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i6)); i6 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c16() local 142 …vacc89ABCDEF = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vacc89ABCDEF), vi6x89ABCDEF), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c16() 233 const __m256 vi6x89ABCDEF = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i6)); i6 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c16() local 236 …vacc89ABCDEF = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vacc89ABCDEF), vi6x89ABCDEF), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c16()
|
D | 7p7x-minmax-neonfp16arith-c32.c | 87 const float16x8_t vi6x89ABCDEF = vld1q_f16(i6); i6 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() local 94 vacc89ABCDEF = vaddq_f16(vacc89ABCDEF, vi6x89ABCDEF); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() 190 const float16x8_t vi6x89ABCDEF = vld1q_f16(i6); i6 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() local 197 vacc89ABCDEF = vaddq_f16(vacc89ABCDEF, vi6x89ABCDEF); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() 314 const float16x8_t vi6x89ABCDEF = vld1q_f16(i6); i6 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() local 321 vacc89ABCDEF = vaddq_f16(vacc89ABCDEF, vi6x89ABCDEF); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32()
|
D | 7p7x-minmax-f16c-c24.c | 78 const __m256 vi6x89ABCDEF = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i6)); i6 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c24() local 83 …vacc89ABCDEF = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vacc89ABCDEF), vi6x89ABCDEF), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c24() 165 const __m256 vi6x89ABCDEF = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i6)); i6 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c24() local 170 …vacc89ABCDEF = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vacc89ABCDEF), vi6x89ABCDEF), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c24() 275 const __m256 vi6x89ABCDEF = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i6)); i6 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c24() local 280 …vacc89ABCDEF = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vacc89ABCDEF), vi6x89ABCDEF), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c24()
|
D | 7x-minmax-neonfp16arith-c16.c | 84 const float16x8_t vi6x89ABCDEF = vld1q_f16(i6); i6 += 8; in xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c16() local 87 vacc89ABCDEF = vaddq_f16(vacc89ABCDEF, vi6x89ABCDEF); in xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c16()
|
/external/XNNPACK/src/qu8-gavgpool/gen/ |
D | 7p7x-minmax-fp32-neon-c16.c | 68 const uint8x8_t vi6x89ABCDEF = vld1_u8(i6); i6 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c16() local 71 vsum89ABCDEF = vaddw_u8(vsum89ABCDEF, vi6x89ABCDEF); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c16() 120 const uint8x8_t vi6x89ABCDEF = vld1_u8(i6); i6 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c16() local 127 vsum89ABCDEF = vaddw_u8(vsum89ABCDEF, vi6x89ABCDEF); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c16() 197 const uint8x8_t vi6x89ABCDEF = vld1_u8(i6); i6 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c16() local 204 vsum89ABCDEF = vaddw_u8(vsum89ABCDEF, vi6x89ABCDEF); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c16()
|
D | 7p7x-minmax-rndnu-neon-c16.c | 68 const uint8x8_t vi6x89ABCDEF = vld1_u8(i6); i6 += 8; in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c16() local 71 vsum89ABCDEF = vaddw_u8(vsum89ABCDEF, vi6x89ABCDEF); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c16() 120 const uint8x8_t vi6x89ABCDEF = vld1_u8(i6); i6 += 8; in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c16() local 127 vsum89ABCDEF = vaddw_u8(vsum89ABCDEF, vi6x89ABCDEF); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c16() 198 const uint8x8_t vi6x89ABCDEF = vld1_u8(i6); i6 += 8; in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c16() local 205 vsum89ABCDEF = vaddw_u8(vsum89ABCDEF, vi6x89ABCDEF); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c16()
|
D | 7p7x-minmax-fp32-neonv8-c16.c | 69 const uint8x8_t vi6x89ABCDEF = vld1_u8(i6); i6 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c16() local 72 vsum89ABCDEF = vaddw_u8(vsum89ABCDEF, vi6x89ABCDEF); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c16() 121 const uint8x8_t vi6x89ABCDEF = vld1_u8(i6); i6 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c16() local 128 vsum89ABCDEF = vaddw_u8(vsum89ABCDEF, vi6x89ABCDEF); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c16() 197 const uint8x8_t vi6x89ABCDEF = vld1_u8(i6); i6 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c16() local 204 vsum89ABCDEF = vaddw_u8(vsum89ABCDEF, vi6x89ABCDEF); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c16()
|
D | 7p7x-minmax-fp32-sse2-c16.c | 92 const __m128i vi6x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i6 + 8)); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c16() local 98 const __m128i vxi6x89ABCDEF = _mm_unpacklo_epi8(vi6x89ABCDEF, vzero); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c16() 178 const __m128i vi6x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i6 + 8)); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c16() local 184 const __m128i vxi6x89ABCDEF = _mm_unpacklo_epi8(vi6x89ABCDEF, vzero); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c16() 284 const __m128i vi6x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i6 + 8)); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c16() local 290 const __m128i vxi6x89ABCDEF = _mm_unpacklo_epi8(vi6x89ABCDEF, vzero); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c16()
|
D | 7p7x-minmax-rndnu-neon-c24.c | 78 const uint8x8_t vi6x89ABCDEF = vld1_u8(i6); i6 += 8; in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() local 83 vsum89ABCDEF = vaddw_u8(vsum89ABCDEF, vi6x89ABCDEF); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() 173 const uint8x8_t vi6x89ABCDEF = vld1_u8(i6); i6 += 8; in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() local 182 vsum89ABCDEF = vaddw_u8(vsum89ABCDEF, vi6x89ABCDEF); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() 298 const uint8x8_t vi6x89ABCDEF = vld1_u8(i6); i6 += 8; in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() local 307 vsum89ABCDEF = vaddw_u8(vsum89ABCDEF, vi6x89ABCDEF); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24()
|
D | 7p7x-minmax-fp32-neon-c24.c | 78 const uint8x8_t vi6x89ABCDEF = vld1_u8(i6); i6 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24() local 83 vsum89ABCDEF = vaddw_u8(vsum89ABCDEF, vi6x89ABCDEF); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24() 173 const uint8x8_t vi6x89ABCDEF = vld1_u8(i6); i6 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24() local 182 vsum89ABCDEF = vaddw_u8(vsum89ABCDEF, vi6x89ABCDEF); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24() 297 const uint8x8_t vi6x89ABCDEF = vld1_u8(i6); i6 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24() local 306 vsum89ABCDEF = vaddw_u8(vsum89ABCDEF, vi6x89ABCDEF); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24()
|
D | 7p7x-minmax-fp32-neonv8-c24.c | 79 const uint8x8_t vi6x89ABCDEF = vld1_u8(i6); i6 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24() local 84 vsum89ABCDEF = vaddw_u8(vsum89ABCDEF, vi6x89ABCDEF); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24() 174 const uint8x8_t vi6x89ABCDEF = vld1_u8(i6); i6 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24() local 183 vsum89ABCDEF = vaddw_u8(vsum89ABCDEF, vi6x89ABCDEF); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24() 297 const uint8x8_t vi6x89ABCDEF = vld1_u8(i6); i6 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24() local 306 vsum89ABCDEF = vaddw_u8(vsum89ABCDEF, vi6x89ABCDEF); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24()
|