/external/XNNPACK/src/qs8-igemm/gen/ |
D | 3x16c2-minmax-neon-mlal-padal-dup.c | 183 …int16x8_t vprod2xCDEFc1 = vmull_s8(vbCDEFc1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup() local 187 …vprod2xCDEFc1 = vmlal_s8(vprod2xCDEFc1, vbCDEFc1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup() 190 vacc2xCDEF = vpadalq_s16(vacc2xCDEF, vprod2xCDEFc1); in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup() 386 …const int16x8_t vprod2xCDEFc1 = vmull_s8(vbCDEFc1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup() local 390 vacc2xCDEF = vpadalq_s16(vacc2xCDEF, vprod2xCDEFc1); in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup() 460 …const int16x8_t vprod2xCDEFc1 = vmull_s8(vbCDEFc1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup() local 461 vacc2xCDEF = vpadalq_s16(vacc2xCDEF, vprod2xCDEFc1); in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
|
D | 4x16c2-minmax-neon-mlal-padal-dup.c | 218 …int16x8_t vprod2xCDEFc1 = vmull_s8(vbCDEFc1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup() local 223 …vprod2xCDEFc1 = vmlal_s8(vprod2xCDEFc1, vbCDEFc1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup() 227 vacc2xCDEF = vpadalq_s16(vacc2xCDEF, vprod2xCDEFc1); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup() 449 …const int16x8_t vprod2xCDEFc1 = vmull_s8(vbCDEFc1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup() local 453 vacc2xCDEF = vpadalq_s16(vacc2xCDEF, vprod2xCDEFc1); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup() 564 …const int16x8_t vprod2xCDEFc1 = vmull_s8(vbCDEFc1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup() local 565 vacc2xCDEF = vpadalq_s16(vacc2xCDEF, vprod2xCDEFc1); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
|
D | 3x16c2-minmax-neon-mull-padal-dup.c | 198 …const int16x8_t vprod2xCDEFc1 = vmull_s8(vbCDEFc1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup() local 202 vacc2xCDEF = vpadalq_s16(vacc2xCDEF, vprod2xCDEFc1); in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup() 272 …const int16x8_t vprod2xCDEFc1 = vmull_s8(vbCDEFc1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup() local 273 vacc2xCDEF = vpadalq_s16(vacc2xCDEF, vprod2xCDEFc1); in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup()
|
D | 4x16c2-minmax-neon-mull-padal-dup.c | 211 …const int16x8_t vprod2xCDEFc1 = vmull_s8(vbCDEFc1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() local 215 vacc2xCDEF = vpadalq_s16(vacc2xCDEF, vprod2xCDEFc1); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() 326 …const int16x8_t vprod2xCDEFc1 = vmull_s8(vbCDEFc1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() local 327 vacc2xCDEF = vpadalq_s16(vacc2xCDEF, vprod2xCDEFc1); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x16c2-minmax-neon-mlal-padal-dup.c | 168 …int16x8_t vprod2xCDEFc1 = vmull_s8(vbCDEFc1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup() local 172 …vprod2xCDEFc1 = vmlal_s8(vprod2xCDEFc1, vbCDEFc1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup() 175 vacc2xCDEF = vpadalq_s16(vacc2xCDEF, vprod2xCDEFc1); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup() 371 …const int16x8_t vprod2xCDEFc1 = vmull_s8(vbCDEFc1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup() local 375 vacc2xCDEF = vpadalq_s16(vacc2xCDEF, vprod2xCDEFc1); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup() 445 …const int16x8_t vprod2xCDEFc1 = vmull_s8(vbCDEFc1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup() local 446 vacc2xCDEF = vpadalq_s16(vacc2xCDEF, vprod2xCDEFc1); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
|
D | 3x16c2-minmax-neon-mull-padal-dup.c | 183 …const int16x8_t vprod2xCDEFc1 = vmull_s8(vbCDEFc1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup() local 187 vacc2xCDEF = vpadalq_s16(vacc2xCDEF, vprod2xCDEFc1); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup() 257 …const int16x8_t vprod2xCDEFc1 = vmull_s8(vbCDEFc1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup() local 258 vacc2xCDEF = vpadalq_s16(vacc2xCDEF, vprod2xCDEFc1); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup()
|
D | 4x16c2-minmax-neon-mlal-padal-dup.c | 201 …int16x8_t vprod2xCDEFc1 = vmull_s8(vbCDEFc1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup() local 206 …vprod2xCDEFc1 = vmlal_s8(vprod2xCDEFc1, vbCDEFc1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup() 210 vacc2xCDEF = vpadalq_s16(vacc2xCDEF, vprod2xCDEFc1); in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup() 432 …const int16x8_t vprod2xCDEFc1 = vmull_s8(vbCDEFc1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup() local 436 vacc2xCDEF = vpadalq_s16(vacc2xCDEF, vprod2xCDEFc1); in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup() 547 …const int16x8_t vprod2xCDEFc1 = vmull_s8(vbCDEFc1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup() local 548 vacc2xCDEF = vpadalq_s16(vacc2xCDEF, vprod2xCDEFc1); in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
|
D | 4x16c2-minmax-neon-mull-padal-dup.c | 194 …const int16x8_t vprod2xCDEFc1 = vmull_s8(vbCDEFc1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() local 198 vacc2xCDEF = vpadalq_s16(vacc2xCDEF, vprod2xCDEFc1); in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() 309 …const int16x8_t vprod2xCDEFc1 = vmull_s8(vbCDEFc1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() local 310 vacc2xCDEF = vpadalq_s16(vacc2xCDEF, vprod2xCDEFc1); in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup()
|