Home
last modified time | relevance | path

Searched refs:vi5x01234567 (Results 1 – 25 of 365) sorted by relevance

12345678910>>...15

/external/XNNPACK/src/qs8-gavgpool/gen/
D7p7x-minmax-fp32-sse2-c8.c69 const __m128i vi5x01234567 = _mm_loadl_epi64((const __m128i*) i5); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c8() local
73 const __m128i vxi5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vi5x01234567, vi5x01234567), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c8()
130 const __m128i vi5x01234567 = _mm_loadl_epi64((const __m128i*) i5); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c8() local
134 … const __m128i vxi5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vi5x01234567, vi5x01234567), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c8()
211 const __m128i vi5x01234567 = _mm_loadl_epi64((const __m128i*) i5); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c8() local
215 const __m128i vxi5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vi5x01234567, vi5x01234567), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c8()
278 const __m128i vi5x01234567 = _mm_loadl_epi64((const __m128i*) i5); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c8() local
287 … const __m128i vxi5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vi5x01234567, vi5x01234567), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c8()
D7p7x-minmax-fp32-neon-c8.c54 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8() local
57 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8()
89 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8() local
92 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8()
147 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8() local
150 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8()
200 const int8x8_t vi5x01234567 = vld1_s8(i5); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8() local
203 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8()
D7p7x-minmax-rndnu-neon-c8.c54 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8() local
57 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8()
89 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8() local
92 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8()
148 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8() local
151 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8()
199 const int8x8_t vi5x01234567 = vld1_s8(i5); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8() local
202 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8()
D7p7x-minmax-fp32-neonv8-c8.c55 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8() local
58 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8()
90 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8() local
93 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8()
147 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8() local
150 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8()
198 const int8x8_t vi5x01234567 = vld1_s8(i5); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8() local
201 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8()
D7x-minmax-fp32-sse2-c8.c87 const __m128i vi5x01234567 = _mm_loadl_epi64((const __m128i*) i5); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__sse2_c8() local
91 const __m128i vxi5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vi5x01234567, vi5x01234567), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__sse2_c8()
153 const __m128i vi5x01234567 = _mm_loadl_epi64((const __m128i*) i5); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__sse2_c8() local
162 … const __m128i vxi5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vi5x01234567, vi5x01234567), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__sse2_c8()
D7p7x-minmax-fp32-sse2-c24.c91 const __m128i vi5x01234567 = _mm_loadl_epi64((const __m128i*) i5); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c24() local
101 const __m128i vxi5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vi5x01234567, vi5x01234567), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c24()
171 const __m128i vi5x01234567 = _mm_loadl_epi64((const __m128i*) i5); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c24() local
180 … const __m128i vxi5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vi5x01234567, vi5x01234567), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c24()
260 const __m128i vi5x01234567 = _mm_loadl_epi64((const __m128i*) i5); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c24() local
270 … const __m128i vxi5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vi5x01234567, vi5x01234567), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c24()
340 const __m128i vi5x01234567 = _mm_loadl_epi64((const __m128i*) i5); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c24() local
349 … const __m128i vxi5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vi5x01234567, vi5x01234567), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c24()
449 const __m128i vi5x01234567 = _mm_loadl_epi64((const __m128i*) i5); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c24() local
459 const __m128i vxi5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vi5x01234567, vi5x01234567), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c24()
[all …]
D7p7x-minmax-fp32-sse2-c16.c80 const __m128i vi5x01234567 = _mm_loadl_epi64((const __m128i*) i5); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c16() local
87 const __m128i vxi5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vi5x01234567, vi5x01234567), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c16()
168 const __m128i vi5x01234567 = _mm_loadl_epi64((const __m128i*) i5); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c16() local
175 … const __m128i vxi5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vi5x01234567, vi5x01234567), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c16()
276 const __m128i vi5x01234567 = _mm_loadl_epi64((const __m128i*) i5); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c16() local
283 const __m128i vxi5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vi5x01234567, vi5x01234567), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c16()
367 const __m128i vi5x01234567 = _mm_loadl_epi64((const __m128i*) i5); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c16() local
376 … const __m128i vxi5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vi5x01234567, vi5x01234567), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c16()
D7p7x-minmax-rndnu-neon-c24.c70 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() local
77 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24()
111 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() local
114 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24()
165 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() local
172 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24()
212 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() local
215 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24()
290 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() local
297 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24()
[all …]
D7p7x-minmax-fp32-neon-c24.c70 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24() local
77 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24()
111 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24() local
114 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24()
165 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24() local
172 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24()
212 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24() local
215 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24()
289 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24() local
296 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24()
[all …]
D7p7x-minmax-fp32-neonv8-c24.c71 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24() local
78 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24()
112 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24() local
115 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24()
166 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24() local
173 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24()
213 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24() local
216 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24()
289 const int8x8_t vi5x01234567 = vld1_s8(i5); i5 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24() local
296 vsum01234567 = vaddw_s8(vsum01234567, vi5x01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24()
[all …]
/external/XNNPACK/src/f16-gavgpool/gen/
D7p7x-minmax-neonfp16arith-c16.c61 const float16x8_t vi5x01234567 = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c16() local
66 vacc01234567 = vaddq_f16(vacc01234567, vi5x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c16()
86 const float16x8_t vi5x01234567 = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c16() local
89 vacc01234567 = vaddq_f16(vacc01234567, vi5x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c16()
132 const float16x8_t vi5x01234567 = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c16() local
137 vacc01234567 = vaddq_f16(vacc01234567, vi5x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c16()
159 const float16x8_t vi5x01234567 = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c16() local
162 vacc01234567 = vaddq_f16(vacc01234567, vi5x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c16()
224 const float16x8_t vi5x01234567 = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c16() local
229 vacc01234567 = vaddq_f16(vacc01234567, vi5x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c16()
[all …]
D7p7x-minmax-neonfp16arith-c8.c53 const float16x8_t vi5x01234567 = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8() local
56 vacc01234567 = vaddq_f16(vacc01234567, vi5x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8()
86 const float16x8_t vi5x01234567 = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8() local
89 vacc01234567 = vaddq_f16(vacc01234567, vi5x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8()
138 const float16x8_t vi5x01234567 = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8() local
141 vacc01234567 = vaddq_f16(vacc01234567, vi5x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8()
165 const float16x8_t vi5x01234567 = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8() local
168 vacc01234567 = vaddq_f16(vacc01234567, vi5x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8()
D7p7x-minmax-neonfp16arith-c24.c69 const float16x8_t vi5x01234567 = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c24() local
76 vacc01234567 = vaddq_f16(vacc01234567, vi5x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c24()
100 const float16x8_t vi5x01234567 = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c24() local
103 vacc01234567 = vaddq_f16(vacc01234567, vi5x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c24()
156 const float16x8_t vi5x01234567 = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c24() local
163 vacc01234567 = vaddq_f16(vacc01234567, vi5x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c24()
189 const float16x8_t vi5x01234567 = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c24() local
192 vacc01234567 = vaddq_f16(vacc01234567, vi5x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c24()
264 const float16x8_t vi5x01234567 = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c24() local
271 vacc01234567 = vaddq_f16(vacc01234567, vi5x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c24()
[all …]
D7p7x-minmax-f16c-c8.c54 const __m256 vi5x01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i5)); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c8() local
57 …vacc01234567 = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vacc01234567), vi5x01234567), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c8()
87 const __m256 vi5x01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i5)); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c8() local
90 …vacc01234567 = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vacc01234567), vi5x01234567), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c8()
140 const __m256 vi5x01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i5)); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c8() local
143 …vacc01234567 = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vacc01234567), vi5x01234567), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c8()
168 const __m256 vi5x01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i5)); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c8() local
171 …vacc01234567 = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vacc01234567), vi5x01234567), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c8()
D7p7x-minmax-f16c-c16.c62 const __m256 vi5x01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i5)); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c16() local
67 …vacc01234567 = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vacc01234567), vi5x01234567), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c16()
87 const __m256 vi5x01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i5)); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c16() local
90 …vacc01234567 = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vacc01234567), vi5x01234567), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c16()
133 const __m256 vi5x01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i5)); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c16() local
138 …vacc01234567 = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vacc01234567), vi5x01234567), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c16()
160 const __m256 vi5x01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i5)); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c16() local
163 …vacc01234567 = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vacc01234567), vi5x01234567), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c16()
227 const __m256 vi5x01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i5)); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c16() local
232 …vacc01234567 = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vacc01234567), vi5x01234567), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c16()
[all …]
D7p7x-minmax-neonfp16arith-c32.c77 const float16x8_t vi5x01234567 = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() local
86 vacc01234567 = vaddq_f16(vacc01234567, vi5x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32()
114 const float16x8_t vi5x01234567 = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() local
117 vacc01234567 = vaddq_f16(vacc01234567, vi5x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32()
180 const float16x8_t vi5x01234567 = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() local
189 vacc01234567 = vaddq_f16(vacc01234567, vi5x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32()
219 const float16x8_t vi5x01234567 = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() local
222 vacc01234567 = vaddq_f16(vacc01234567, vi5x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32()
304 const float16x8_t vi5x01234567 = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() local
313 vacc01234567 = vaddq_f16(vacc01234567, vi5x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32()
[all …]
D7x-minmax-neonfp16arith-c8.c70 const float16x8_t vi5x01234567 = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c8() local
73 vacc01234567 = vaddq_f16(vacc01234567, vi5x01234567); in xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c8()
96 const float16x8_t vi5x01234567 = vld1q_f16(i5); i5 += 8; in xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c8() local
99 vacc01234567 = vaddq_f16(vacc01234567, vi5x01234567); in xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c8()
D7x-minmax-f16c-c8.c77 const __m256 vi5x01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i5)); in xnn_f16_gavgpool_minmax_ukernel_7x__f16c_c8() local
82 …vacc01234567 = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vacc01234567), vi5x01234567), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7x__f16c_c8()
106 const __m256 vi5x01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i5)); in xnn_f16_gavgpool_minmax_ukernel_7x__f16c_c8() local
109 …vacc01234567 = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vacc01234567), vi5x01234567), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7x__f16c_c8()
/external/XNNPACK/src/qu8-gavgpool/gen/
D7p7x-minmax-rndnu-neon-c8.c54 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8() local
57 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8()
89 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8() local
92 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8()
148 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8() local
151 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8()
199 const uint8x8_t vi5x01234567 = vld1_u8(i5); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8() local
202 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8()
D7p7x-minmax-fp32-neonv8-c8.c55 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8() local
58 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8()
90 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8() local
93 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8()
147 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8() local
150 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8()
198 const uint8x8_t vi5x01234567 = vld1_u8(i5); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8() local
201 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8()
D7p7x-minmax-fp32-neon-c8.c54 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8() local
57 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8()
89 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8() local
92 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8()
147 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8() local
150 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8()
200 const uint8x8_t vi5x01234567 = vld1_u8(i5); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8() local
203 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8()
D7p7x-minmax-fp32-sse2-c8.c70 const __m128i vi5x01234567 = _mm_loadl_epi64((const __m128i*) i5); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c8() local
74 const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, vzero); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c8()
130 const __m128i vi5x01234567 = _mm_loadl_epi64((const __m128i*) i5); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c8() local
134 const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, vzero); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c8()
210 const __m128i vi5x01234567 = _mm_loadl_epi64((const __m128i*) i5); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c8() local
214 const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, vzero); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c8()
276 const __m128i vi5x01234567 = _mm_loadl_epi64((const __m128i*) i5); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c8() local
285 const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, vzero); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c8()
D7p7x-minmax-rndnu-neon-c24.c70 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() local
77 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24()
111 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() local
114 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24()
165 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() local
172 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24()
212 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() local
215 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24()
290 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() local
297 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24()
[all …]
D7p7x-minmax-fp32-neon-c24.c70 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24() local
77 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24()
111 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24() local
114 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24()
165 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24() local
172 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24()
212 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24() local
215 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24()
289 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24() local
296 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c24()
[all …]
D7p7x-minmax-fp32-neonv8-c24.c71 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24() local
78 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24()
112 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24() local
115 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24()
166 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24() local
173 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24()
213 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24() local
216 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24()
289 const uint8x8_t vi5x01234567 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24() local
296 vsum01234567 = vaddw_u8(vsum01234567, vi5x01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c24()
[all …]

12345678910>>...15