Home
last modified time | relevance | path

Searched refs:vi2x01234567 (Results 1 – 25 of 424) sorted by relevance

12345678910>>...17

/external/XNNPACK/src/qs8-gavgpool/gen/
D7p7x-minmax-fp32-sse2-c8.c54 const __m128i vi2x01234567 = _mm_loadl_epi64((const __m128i*) i2); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c8() local
58 const __m128i vxi2x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vi2x01234567, vi2x01234567), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c8()
115 const __m128i vi2x01234567 = _mm_loadl_epi64((const __m128i*) i2); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c8() local
119 … const __m128i vxi2x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vi2x01234567, vi2x01234567), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c8()
196 const __m128i vi2x01234567 = _mm_loadl_epi64((const __m128i*) i2); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c8() local
200 const __m128i vxi2x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vi2x01234567, vi2x01234567), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c8()
264 const __m128i vi2x01234567 = _mm_loadl_epi64((const __m128i*) i2); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c8() local
272 … const __m128i vxi2x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vi2x01234567, vi2x01234567), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c8()
D7p7x-minmax-fp32-neon-c8.c47 const int8x8_t vi2x01234567 = vld1_s8(i2); i2 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8() local
51 vsum01234567 = vaddw_s8(vsum01234567, vi2x01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8()
82 const int8x8_t vi2x01234567 = vld1_s8(i2); i2 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8() local
86 vsum01234567 = vaddw_s8(vsum01234567, vi2x01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8()
140 const int8x8_t vi2x01234567 = vld1_s8(i2); i2 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8() local
144 vsum01234567 = vaddw_s8(vsum01234567, vi2x01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8()
193 const int8x8_t vi2x01234567 = vld1_s8(i2); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8() local
197 vsum01234567 = vaddw_s8(vsum01234567, vi2x01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8()
D7p7x-minmax-rndnu-neon-c8.c47 const int8x8_t vi2x01234567 = vld1_s8(i2); i2 += 8; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8() local
51 vsum01234567 = vaddw_s8(vsum01234567, vi2x01234567); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8()
82 const int8x8_t vi2x01234567 = vld1_s8(i2); i2 += 8; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8() local
86 vsum01234567 = vaddw_s8(vsum01234567, vi2x01234567); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8()
141 const int8x8_t vi2x01234567 = vld1_s8(i2); i2 += 8; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8() local
145 vsum01234567 = vaddw_s8(vsum01234567, vi2x01234567); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8()
192 const int8x8_t vi2x01234567 = vld1_s8(i2); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8() local
196 vsum01234567 = vaddw_s8(vsum01234567, vi2x01234567); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8()
D7p7x-minmax-fp32-neonv8-c8.c48 const int8x8_t vi2x01234567 = vld1_s8(i2); i2 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8() local
52 vsum01234567 = vaddw_s8(vsum01234567, vi2x01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8()
83 const int8x8_t vi2x01234567 = vld1_s8(i2); i2 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8() local
87 vsum01234567 = vaddw_s8(vsum01234567, vi2x01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8()
140 const int8x8_t vi2x01234567 = vld1_s8(i2); i2 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8() local
144 vsum01234567 = vaddw_s8(vsum01234567, vi2x01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8()
191 const int8x8_t vi2x01234567 = vld1_s8(i2); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8() local
195 vsum01234567 = vaddw_s8(vsum01234567, vi2x01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8()
/external/XNNPACK/src/f16-gavgpool/gen/
D7p7x-minmax-neonfp16arith-c16.c48 const float16x8_t vi2x01234567 = vld1q_f16(i2); i2 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c16() local
54 vacc01234567 = vaddq_f16(vacc01234567, vi2x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c16()
79 const float16x8_t vi2x01234567 = vld1q_f16(i2); i2 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c16() local
83 vacc01234567 = vaddq_f16(vacc01234567, vi2x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c16()
120 const float16x8_t vi2x01234567 = vld1q_f16(i2); i2 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c16() local
125 vacc01234567 = vaddq_f16(vacc01234567, vi2x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c16()
153 const float16x8_t vi2x01234567 = vld1q_f16(i2); i2 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c16() local
156 vacc01234567 = vaddq_f16(vacc01234567, vi2x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c16()
212 const float16x8_t vi2x01234567 = vld1q_f16(i2); i2 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c16() local
217 vacc01234567 = vaddq_f16(vacc01234567, vi2x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c16()
[all …]
D7p7x-minmax-neonfp16arith-c8.c46 const float16x8_t vi2x01234567 = vld1q_f16(i2); i2 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8() local
50 vacc01234567 = vaddq_f16(vacc01234567, vi2x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8()
80 const float16x8_t vi2x01234567 = vld1q_f16(i2); i2 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8() local
83 vacc01234567 = vaddq_f16(vacc01234567, vi2x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8()
132 const float16x8_t vi2x01234567 = vld1q_f16(i2); i2 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8() local
135 vacc01234567 = vaddq_f16(vacc01234567, vi2x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8()
159 const float16x8_t vi2x01234567 = vld1q_f16(i2); i2 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8() local
162 vacc01234567 = vaddq_f16(vacc01234567, vi2x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8()
D7p7x-minmax-neonfp16arith-c24.c50 const float16x8_t vi2x01234567 = vld1q_f16(i2); i2 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c24() local
58 vacc01234567 = vaddq_f16(vacc01234567, vi2x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c24()
93 const float16x8_t vi2x01234567 = vld1q_f16(i2); i2 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c24() local
97 vacc01234567 = vaddq_f16(vacc01234567, vi2x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c24()
138 const float16x8_t vi2x01234567 = vld1q_f16(i2); i2 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c24() local
145 vacc01234567 = vaddq_f16(vacc01234567, vi2x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c24()
183 const float16x8_t vi2x01234567 = vld1q_f16(i2); i2 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c24() local
186 vacc01234567 = vaddq_f16(vacc01234567, vi2x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c24()
246 const float16x8_t vi2x01234567 = vld1q_f16(i2); i2 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c24() local
253 vacc01234567 = vaddq_f16(vacc01234567, vi2x01234567); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c24()
[all …]
D7p7x-minmax-f16c-c8.c47 const __m256 vi2x01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i2)); i2 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c8() local
51 …vacc01234567 = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vacc01234567), vi2x01234567), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c8()
81 const __m256 vi2x01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i2)); i2 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c8() local
84 …vacc01234567 = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vacc01234567), vi2x01234567), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c8()
134 const __m256 vi2x01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i2)); i2 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c8() local
137 …vacc01234567 = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vacc01234567), vi2x01234567), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c8()
162 const __m256 vi2x01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i2)); i2 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c8() local
165 …vacc01234567 = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vacc01234567), vi2x01234567), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c8()
D7p7x-minmax-f16c-c16.c49 const __m256 vi2x01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i2)); i2 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c16() local
55 …vacc01234567 = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vacc01234567), vi2x01234567), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c16()
80 const __m256 vi2x01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i2)); i2 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c16() local
84 …vacc01234567 = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vacc01234567), vi2x01234567), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c16()
121 const __m256 vi2x01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i2)); i2 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c16() local
126 …vacc01234567 = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vacc01234567), vi2x01234567), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c16()
154 const __m256 vi2x01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i2)); i2 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c16() local
157 …vacc01234567 = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vacc01234567), vi2x01234567), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c16()
215 const __m256 vi2x01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i2)); i2 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c16() local
220 …vacc01234567 = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vacc01234567), vi2x01234567), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c16()
[all …]
/external/XNNPACK/src/f32-dwconv/gen/
Dup16x3-minmax-avx.c77 const __m256 vi2x01234567 = _mm256_loadu_ps(i2); in xnn_f32_dwconv_minmax_ukernel_up16x3__avx() local
83 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi2x01234567, vk2x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x3__avx()
113 const __m256 vi2x01234567 = _mm256_loadu_ps(i2); in xnn_f32_dwconv_minmax_ukernel_up16x3__avx() local
117 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi2x01234567, vk2x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x3__avx()
143 const __m256 vi2x01234567 = _mm256_maskload_ps(i2, vmask); in xnn_f32_dwconv_minmax_ukernel_up16x3__avx() local
145 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi2x01234567, vk2x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x3__avx()
Dup16x3-minmax-fma3.c77 const __m256 vi2x01234567 = _mm256_loadu_ps(i2); in xnn_f32_dwconv_minmax_ukernel_up16x3__fma3() local
83 vacc01234567p0 = _mm256_fmadd_ps(vi2x01234567, vk2x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x3__fma3()
113 const __m256 vi2x01234567 = _mm256_loadu_ps(i2); in xnn_f32_dwconv_minmax_ukernel_up16x3__fma3() local
117 vacc01234567p0 = _mm256_fmadd_ps(vi2x01234567, vk2x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x3__fma3()
143 const __m256 vi2x01234567 = _mm256_maskload_ps(i2, vmask); in xnn_f32_dwconv_minmax_ukernel_up16x3__fma3() local
145 vacc01234567p0 = _mm256_fmadd_ps(vi2x01234567, vk2x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x3__fma3()
Dup16x3-minmax-fma3-acc2.c77 const __m256 vi2x01234567 = _mm256_loadu_ps(i2); in xnn_f32_dwconv_minmax_ukernel_up16x3__fma3_acc2() local
83 vacc01234567p0 = _mm256_fmadd_ps(vi2x01234567, vk2x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x3__fma3_acc2()
116 const __m256 vi2x01234567 = _mm256_loadu_ps(i2); in xnn_f32_dwconv_minmax_ukernel_up16x3__fma3_acc2() local
120 vacc01234567p0 = _mm256_fmadd_ps(vi2x01234567, vk2x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x3__fma3_acc2()
148 const __m256 vi2x01234567 = _mm256_maskload_ps(i2, vmask); in xnn_f32_dwconv_minmax_ukernel_up16x3__fma3_acc2() local
150 vacc01234567p0 = _mm256_fmadd_ps(vi2x01234567, vk2x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x3__fma3_acc2()
Dup16x3-minmax-avx-acc2.c77 const __m256 vi2x01234567 = _mm256_loadu_ps(i2); in xnn_f32_dwconv_minmax_ukernel_up16x3__avx_acc2() local
83 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi2x01234567, vk2x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x3__avx_acc2()
116 const __m256 vi2x01234567 = _mm256_loadu_ps(i2); in xnn_f32_dwconv_minmax_ukernel_up16x3__avx_acc2() local
120 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi2x01234567, vk2x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x3__avx_acc2()
148 const __m256 vi2x01234567 = _mm256_maskload_ps(i2, vmask); in xnn_f32_dwconv_minmax_ukernel_up16x3__avx_acc2() local
150 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi2x01234567, vk2x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x3__avx_acc2()
Dup16x4-minmax-fma3.c82 const __m256 vi2x01234567 = _mm256_loadu_ps(i2); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3() local
88 vacc01234567p0 = _mm256_fmadd_ps(vi2x01234567, vk2x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3()
127 const __m256 vi2x01234567 = _mm256_loadu_ps(i2); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3() local
131 vacc01234567p0 = _mm256_fmadd_ps(vi2x01234567, vk2x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3()
163 const __m256 vi2x01234567 = _mm256_maskload_ps(i2, vmask); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3() local
165 vacc01234567p0 = _mm256_fmadd_ps(vi2x01234567, vk2x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3()
Dup16x4-minmax-fma3-acc2.c82 const __m256 vi2x01234567 = _mm256_loadu_ps(i2); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3_acc2() local
88 vacc01234567p0 = _mm256_fmadd_ps(vi2x01234567, vk2x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3_acc2()
130 const __m256 vi2x01234567 = _mm256_loadu_ps(i2); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3_acc2() local
134 vacc01234567p0 = _mm256_fmadd_ps(vi2x01234567, vk2x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3_acc2()
168 const __m256 vi2x01234567 = _mm256_maskload_ps(i2, vmask); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3_acc2() local
170 vacc01234567p0 = _mm256_fmadd_ps(vi2x01234567, vk2x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3_acc2()
Dup16x4-minmax-avx-acc2.c82 const __m256 vi2x01234567 = _mm256_loadu_ps(i2); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx_acc2() local
88 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi2x01234567, vk2x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx_acc2()
130 const __m256 vi2x01234567 = _mm256_loadu_ps(i2); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx_acc2() local
134 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi2x01234567, vk2x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx_acc2()
168 const __m256 vi2x01234567 = _mm256_maskload_ps(i2, vmask); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx_acc2() local
170 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi2x01234567, vk2x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx_acc2()
Dup16x4-minmax-avx.c82 const __m256 vi2x01234567 = _mm256_loadu_ps(i2); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx() local
88 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi2x01234567, vk2x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx()
127 const __m256 vi2x01234567 = _mm256_loadu_ps(i2); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx() local
131 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi2x01234567, vk2x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx()
163 const __m256 vi2x01234567 = _mm256_maskload_ps(i2, vmask); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx() local
165 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi2x01234567, vk2x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx()
/external/XNNPACK/src/f16-dwconv/gen/
Dup16x3-minmax-neonfp16arith-acc2.c75 const float16x8_t vi2x01234567 = vld1q_f16(i2); i2 += 8; in xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith_acc2() local
79 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi2x01234567, vk2x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith_acc2()
106 const float16x8_t vi2x01234567 = vld1q_f16(i2); i2 += 8; in xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith_acc2() local
108 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi2x01234567, vk2x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith_acc2()
130 const float16x8_t vi2x01234567 = vld1q_f16(i2); in xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith_acc2() local
132 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi2x01234567, vk2x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith_acc2()
Dup16x3-minmax-neonfp16arith.c75 const float16x8_t vi2x01234567 = vld1q_f16(i2); i2 += 8; in xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith() local
79 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi2x01234567, vk2x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith()
103 const float16x8_t vi2x01234567 = vld1q_f16(i2); i2 += 8; in xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith() local
105 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi2x01234567, vk2x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith()
125 const float16x8_t vi2x01234567 = vld1q_f16(i2); in xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith() local
127 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi2x01234567, vk2x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith()
Dup16x4-minmax-neonfp16arith-acc2.c80 const float16x8_t vi2x01234567 = vld1q_f16(i2); i2 += 8; in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2() local
84 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi2x01234567, vk2x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2()
118 const float16x8_t vi2x01234567 = vld1q_f16(i2); i2 += 8; in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2() local
120 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi2x01234567, vk2x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2()
146 const float16x8_t vi2x01234567 = vld1q_f16(i2); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2() local
148 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi2x01234567, vk2x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2()
Dup16x4-minmax-neonfp16arith.c80 const float16x8_t vi2x01234567 = vld1q_f16(i2); i2 += 8; in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith() local
84 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi2x01234567, vk2x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith()
115 const float16x8_t vi2x01234567 = vld1q_f16(i2); i2 += 8; in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith() local
117 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi2x01234567, vk2x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith()
141 const float16x8_t vi2x01234567 = vld1q_f16(i2); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith() local
143 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi2x01234567, vk2x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith()
Dup16x3-minmax-fma3.c80 const __m256 vi2x01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i2)); in xnn_f16_dwconv_minmax_ukernel_up16x3__fma3() local
86 …vacc01234567p0 = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_fmadd_ps(vi2x01234567, vk2x01234567, vacc0… in xnn_f16_dwconv_minmax_ukernel_up16x3__fma3()
116 const __m256 vi2x01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i2)); in xnn_f16_dwconv_minmax_ukernel_up16x3__fma3() local
120 …vacc01234567p0 = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_fmadd_ps(vi2x01234567, vk2x01234567, vacc0… in xnn_f16_dwconv_minmax_ukernel_up16x3__fma3()
147 const __m256 vi2x01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i2)); in xnn_f16_dwconv_minmax_ukernel_up16x3__fma3() local
150 …vacc01234567p0 = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_fmadd_ps(vi2x01234567, vk2x01234567, vacc0… in xnn_f16_dwconv_minmax_ukernel_up16x3__fma3()
/external/XNNPACK/src/qu8-gavgpool/gen/
D7p7x-minmax-rndnu-neon-c8.c47 const uint8x8_t vi2x01234567 = vld1_u8(i2); i2 += 8; in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8() local
51 vsum01234567 = vaddw_u8(vsum01234567, vi2x01234567); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8()
82 const uint8x8_t vi2x01234567 = vld1_u8(i2); i2 += 8; in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8() local
86 vsum01234567 = vaddw_u8(vsum01234567, vi2x01234567); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8()
141 const uint8x8_t vi2x01234567 = vld1_u8(i2); i2 += 8; in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8() local
145 vsum01234567 = vaddw_u8(vsum01234567, vi2x01234567); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8()
192 const uint8x8_t vi2x01234567 = vld1_u8(i2); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8() local
196 vsum01234567 = vaddw_u8(vsum01234567, vi2x01234567); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8()
D7p7x-minmax-fp32-neonv8-c8.c48 const uint8x8_t vi2x01234567 = vld1_u8(i2); i2 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8() local
52 vsum01234567 = vaddw_u8(vsum01234567, vi2x01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8()
83 const uint8x8_t vi2x01234567 = vld1_u8(i2); i2 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8() local
87 vsum01234567 = vaddw_u8(vsum01234567, vi2x01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8()
140 const uint8x8_t vi2x01234567 = vld1_u8(i2); i2 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8() local
144 vsum01234567 = vaddw_u8(vsum01234567, vi2x01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8()
191 const uint8x8_t vi2x01234567 = vld1_u8(i2); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8() local
195 vsum01234567 = vaddw_u8(vsum01234567, vi2x01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c8()
D7p7x-minmax-fp32-neon-c8.c47 const uint8x8_t vi2x01234567 = vld1_u8(i2); i2 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8() local
51 vsum01234567 = vaddw_u8(vsum01234567, vi2x01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8()
82 const uint8x8_t vi2x01234567 = vld1_u8(i2); i2 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8() local
86 vsum01234567 = vaddw_u8(vsum01234567, vi2x01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8()
140 const uint8x8_t vi2x01234567 = vld1_u8(i2); i2 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8() local
144 vsum01234567 = vaddw_u8(vsum01234567, vi2x01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8()
193 const uint8x8_t vi2x01234567 = vld1_u8(i2); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8() local
197 vsum01234567 = vaddw_u8(vsum01234567, vi2x01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c8()

12345678910>>...17