Home
last modified time | relevance | path

Searched refs:vacc1x4 (Results 1 – 25 of 36) sorted by relevance

12

/external/XNNPACK/src/qs8-gemm/gen/
D2x8c8-minmax-neon-mull-padal.c62 int32x4_t vacc1x4 = vacc0x4; in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal() local
98 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
125 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
153 const int32x2_t vpsum1x4 = vadd_s32(vget_low_s32(vacc1x4), vget_high_s32(vacc1x4)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
D2x8c8-minmax-neon-mlal-padal.c62 int32x4_t vacc1x4 = vacc0x4; in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
118 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
173 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
200 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
228 const int32x2_t vpsum1x4 = vadd_s32(vget_low_s32(vacc1x4), vget_high_s32(vacc1x4)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
D2x8c16-minmax-neon-mlal-padal.c62 int32x4_t vacc1x4 = vacc0x4; in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() local
111 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
141 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
169 const int32x2_t vpsum1x4 = vadd_s32(vget_low_s32(vacc1x4), vget_high_s32(vacc1x4)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
D3x8c8-minmax-neon-mlal-padal.c68 int32x4_t vacc1x4 = vacc0x4; in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
148 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
223 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
257 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
291 const int32x2_t vpsum1x4 = vadd_s32(vget_low_s32(vacc1x4), vget_high_s32(vacc1x4)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D3x8c8-minmax-neon-mull-padal.c68 int32x4_t vacc1x4 = vacc0x4; in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() local
122 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
156 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
190 const int32x2_t vpsum1x4 = vadd_s32(vget_low_s32(vacc1x4), vget_high_s32(vacc1x4)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
D3x8c16-minmax-neon-mlal-padal.c68 int32x4_t vacc1x4 = vacc0x4; in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
140 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
180 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
214 const int32x2_t vpsum1x4 = vadd_s32(vget_low_s32(vacc1x4), vget_high_s32(vacc1x4)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
D4x8c8-minmax-neon-mull-padal.c74 int32x4_t vacc1x4 = vacc0x4; in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() local
146 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
187 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
227 const int32x2_t vpsum1x4 = vadd_s32(vget_low_s32(vacc1x4), vget_high_s32(vacc1x4)); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
D2x16c8-minmax-neon-mull-padal.c70 int32x4_t vacc1x4 = vacc0x4; in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() local
114 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
185 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
235 const int32x2_t vpsum1x4 = vadd_s32(vget_low_s32(vacc1x4), vget_high_s32(vacc1x4)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
D4x8c8-minmax-neon-mlal-padal.c74 int32x4_t vacc1x4 = vacc0x4; in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
178 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
273 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
314 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
354 const int32x2_t vpsum1x4 = vadd_s32(vget_low_s32(vacc1x4), vget_high_s32(vacc1x4)); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
D2x16c8-minmax-neon-mlal-padal.c70 int32x4_t vacc1x4 = vacc0x4; in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
142 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
253 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
324 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
374 const int32x2_t vpsum1x4 = vadd_s32(vget_low_s32(vacc1x4), vget_high_s32(vacc1x4)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
D2x16c16-minmax-neon-mlal-padal.c70 int32x4_t vacc1x4 = vacc0x4; in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
135 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
217 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
267 const int32x2_t vpsum1x4 = vadd_s32(vget_low_s32(vacc1x4), vget_high_s32(vacc1x4)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
D4x8c16-minmax-neon-mlal-padal.c74 int32x4_t vacc1x4 = vacc0x4; in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
169 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
219 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
259 const int32x2_t vpsum1x4 = vadd_s32(vget_low_s32(vacc1x4), vget_high_s32(vacc1x4)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
/external/XNNPACK/src/qs8-igemm/gen/
D2x8c8-minmax-neon-mlal-padal.c63 int32x4_t vacc1x4 = vacc0x4; in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
131 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
186 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
216 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
244 const int32x2_t vpsum1x4 = vadd_s32(vget_low_s32(vacc1x4), vget_high_s32(vacc1x4)); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
D2x8c8-minmax-neon-mull-padal.c63 int32x4_t vacc1x4 = vacc0x4; in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal() local
111 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal()
141 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal()
169 const int32x2_t vpsum1x4 = vadd_s32(vget_low_s32(vacc1x4), vget_high_s32(vacc1x4)); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal()
D2x8c16-minmax-neon-mlal-padal.c63 int32x4_t vacc1x4 = vacc0x4; in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() local
124 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
157 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
185 const int32x2_t vpsum1x4 = vadd_s32(vget_low_s32(vacc1x4), vget_high_s32(vacc1x4)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
D3x8c8-minmax-neon-mull-padal.c67 int32x4_t vacc1x4 = vacc0x4; in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() local
137 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
174 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
208 const int32x2_t vpsum1x4 = vadd_s32(vget_low_s32(vacc1x4), vget_high_s32(vacc1x4)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
D3x8c8-minmax-neon-mlal-padal.c67 int32x4_t vacc1x4 = vacc0x4; in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
163 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
238 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
275 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
309 const int32x2_t vpsum1x4 = vadd_s32(vget_low_s32(vacc1x4), vget_high_s32(vacc1x4)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D3x8c16-minmax-neon-mlal-padal.c67 int32x4_t vacc1x4 = vacc0x4; in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
155 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
198 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
232 const int32x2_t vpsum1x4 = vadd_s32(vget_low_s32(vacc1x4), vget_high_s32(vacc1x4)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
D4x8c8-minmax-neon-mull-padal.c71 int32x4_t vacc1x4 = vacc0x4; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() local
163 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
207 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
247 const int32x2_t vpsum1x4 = vadd_s32(vget_low_s32(vacc1x4), vget_high_s32(vacc1x4)); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
D2x16c8-minmax-neon-mull-padal.c71 int32x4_t vacc1x4 = vacc0x4; in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() local
127 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
201 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
251 const int32x2_t vpsum1x4 = vadd_s32(vget_low_s32(vacc1x4), vget_high_s32(vacc1x4)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
D2x16c8-minmax-neon-mlal-padal.c71 int32x4_t vacc1x4 = vacc0x4; in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
155 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
266 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
340 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
390 const int32x2_t vpsum1x4 = vadd_s32(vget_low_s32(vacc1x4), vget_high_s32(vacc1x4)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
D4x8c8-minmax-neon-mlal-padal.c71 int32x4_t vacc1x4 = vacc0x4; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
195 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
290 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
334 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
374 const int32x2_t vpsum1x4 = vadd_s32(vget_low_s32(vacc1x4), vget_high_s32(vacc1x4)); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
D4x8c16-minmax-neon-mlal-padal.c71 int32x4_t vacc1x4 = vacc0x4; in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
186 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
239 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
279 const int32x2_t vpsum1x4 = vadd_s32(vget_low_s32(vacc1x4), vget_high_s32(vacc1x4)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
D2x16c16-minmax-neon-mlal-padal.c71 int32x4_t vacc1x4 = vacc0x4; in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
148 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
233 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
283 const int32x2_t vpsum1x4 = vadd_s32(vget_low_s32(vacc1x4), vget_high_s32(vacc1x4)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
D3x16c8-minmax-neon-mull-padal.c75 int32x4_t vacc1x4 = vacc0x4; in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local
161 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
258 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
320 const int32x2_t vpsum1x4 = vadd_s32(vget_low_s32(vacc1x4), vget_high_s32(vacc1x4)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()

12