Home
last modified time | relevance | path

Searched refs:vacc1x5 (Results 1 – 25 of 36) sorted by relevance

12

/external/XNNPACK/src/qs8-gemm/gen/
D2x8c8-minmax-neon-mull-padal.c63 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal() local
103 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
125 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
154 const int32x2_t vpsum1x5 = vadd_s32(vget_low_s32(vacc1x5), vget_high_s32(vacc1x5)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
D2x8c8-minmax-neon-mlal-padal.c63 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
125 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
178 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
200 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
229 const int32x2_t vpsum1x5 = vadd_s32(vget_low_s32(vacc1x5), vget_high_s32(vacc1x5)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
D2x8c16-minmax-neon-mlal-padal.c63 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() local
117 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
141 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
170 const int32x2_t vpsum1x5 = vadd_s32(vget_low_s32(vacc1x5), vget_high_s32(vacc1x5)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
D3x8c8-minmax-neon-mlal-padal.c69 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
158 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
230 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
257 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
292 const int32x2_t vpsum1x5 = vadd_s32(vget_low_s32(vacc1x5), vget_high_s32(vacc1x5)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D3x8c8-minmax-neon-mull-padal.c69 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() local
129 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
156 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
191 const int32x2_t vpsum1x5 = vadd_s32(vget_low_s32(vacc1x5), vget_high_s32(vacc1x5)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
D3x8c16-minmax-neon-mlal-padal.c69 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
149 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
180 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
215 const int32x2_t vpsum1x5 = vadd_s32(vget_low_s32(vacc1x5), vget_high_s32(vacc1x5)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
D4x8c8-minmax-neon-mull-padal.c75 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() local
155 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
187 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
228 const int32x2_t vpsum1x5 = vadd_s32(vget_low_s32(vacc1x5), vget_high_s32(vacc1x5)); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
D2x16c8-minmax-neon-mull-padal.c71 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() local
119 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
185 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
236 const int32x2_t vpsum1x5 = vadd_s32(vget_low_s32(vacc1x5), vget_high_s32(vacc1x5)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
D4x8c8-minmax-neon-mlal-padal.c75 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
191 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
282 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
314 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
355 const int32x2_t vpsum1x5 = vadd_s32(vget_low_s32(vacc1x5), vget_high_s32(vacc1x5)); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
D2x16c8-minmax-neon-mlal-padal.c71 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
149 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
258 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
324 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
375 const int32x2_t vpsum1x5 = vadd_s32(vget_low_s32(vacc1x5), vget_high_s32(vacc1x5)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
D2x16c16-minmax-neon-mlal-padal.c71 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
141 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
217 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
268 const int32x2_t vpsum1x5 = vadd_s32(vget_low_s32(vacc1x5), vget_high_s32(vacc1x5)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
D4x8c16-minmax-neon-mlal-padal.c75 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
181 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
219 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
260 const int32x2_t vpsum1x5 = vadd_s32(vget_low_s32(vacc1x5), vget_high_s32(vacc1x5)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
/external/XNNPACK/src/qs8-igemm/gen/
D2x8c8-minmax-neon-mlal-padal.c64 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
138 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
191 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
216 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
245 const int32x2_t vpsum1x5 = vadd_s32(vget_low_s32(vacc1x5), vget_high_s32(vacc1x5)); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
D2x8c8-minmax-neon-mull-padal.c64 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal() local
116 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal()
141 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal()
170 const int32x2_t vpsum1x5 = vadd_s32(vget_low_s32(vacc1x5), vget_high_s32(vacc1x5)); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal()
D2x8c16-minmax-neon-mlal-padal.c64 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() local
130 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
157 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
186 const int32x2_t vpsum1x5 = vadd_s32(vget_low_s32(vacc1x5), vget_high_s32(vacc1x5)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
D3x8c8-minmax-neon-mull-padal.c68 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() local
144 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
174 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
209 const int32x2_t vpsum1x5 = vadd_s32(vget_low_s32(vacc1x5), vget_high_s32(vacc1x5)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
D3x8c8-minmax-neon-mlal-padal.c68 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
173 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
245 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
275 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
310 const int32x2_t vpsum1x5 = vadd_s32(vget_low_s32(vacc1x5), vget_high_s32(vacc1x5)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D3x8c16-minmax-neon-mlal-padal.c68 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
164 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
198 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
233 const int32x2_t vpsum1x5 = vadd_s32(vget_low_s32(vacc1x5), vget_high_s32(vacc1x5)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
D4x8c8-minmax-neon-mull-padal.c72 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() local
172 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
207 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
248 const int32x2_t vpsum1x5 = vadd_s32(vget_low_s32(vacc1x5), vget_high_s32(vacc1x5)); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
D2x16c8-minmax-neon-mull-padal.c72 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() local
132 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
201 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
252 const int32x2_t vpsum1x5 = vadd_s32(vget_low_s32(vacc1x5), vget_high_s32(vacc1x5)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
D2x16c8-minmax-neon-mlal-padal.c72 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
162 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
271 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
340 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
391 const int32x2_t vpsum1x5 = vadd_s32(vget_low_s32(vacc1x5), vget_high_s32(vacc1x5)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
D4x8c8-minmax-neon-mlal-padal.c72 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
208 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
299 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
334 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
375 const int32x2_t vpsum1x5 = vadd_s32(vget_low_s32(vacc1x5), vget_high_s32(vacc1x5)); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
D4x8c16-minmax-neon-mlal-padal.c72 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
198 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
239 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
280 const int32x2_t vpsum1x5 = vadd_s32(vget_low_s32(vacc1x5), vget_high_s32(vacc1x5)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
D2x16c16-minmax-neon-mlal-padal.c72 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
154 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
233 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
284 const int32x2_t vpsum1x5 = vadd_s32(vget_low_s32(vacc1x5), vget_high_s32(vacc1x5)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
D3x16c8-minmax-neon-mull-padal.c76 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local
168 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
258 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
321 const int32x2_t vpsum1x5 = vadd_s32(vget_low_s32(vacc1x5), vget_high_s32(vacc1x5)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()

12