/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x8c2-minmax-neon-mlal-padal-dup.c | 94 …int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup() local 98 …vprod2x4567c0 = vmlal_s8(vprod2x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup() 101 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup() 220 …const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup() local 224 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup() 250 …const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup() local 251 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
|
D | 4x8c2-minmax-neon-mlal-padal-dup.c | 107 …int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup() local 112 …vprod2x4567c0 = vmlal_s8(vprod2x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup() 116 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup() 255 …const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup() local 259 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup() 302 …const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup() local 303 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup()
|
D | 3x8c2-minmax-neon-mull-padal-dup.c | 120 …const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup() local 124 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup() 150 …const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup() local 151 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup()
|
D | 4x8c2-minmax-neon-mull-padal-dup.c | 129 …const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup() local 133 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup() 176 …const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup() local 177 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup()
|
D | 3x16c2-minmax-neon-mlal-padal-dup.c | 108 …int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup() local 112 …vprod2x4567c0 = vmlal_s8(vprod2x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup() 115 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup() 354 …const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup() local 358 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup() 410 …const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup() local 411 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
|
D | 3x16c2-minmax-neon-mull-padal-dup.c | 166 …const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup() local 170 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup() 222 …const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup() local 223 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup()
|
D | 4x16c2-minmax-neon-mlal-padal-dup.c | 123 …int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup() local 128 …vprod2x4567c0 = vmlal_s8(vprod2x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup() 132 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup() 415 …const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup() local 419 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup() 504 …const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup() local 505 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
|
D | 4x16c2-minmax-neon-mull-padal-dup.c | 177 …const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() local 181 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() 266 …const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() local 267 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 3x8c2-minmax-neon-mlal-padal-dup.c | 109 …int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup() local 113 …vprod2x4567c0 = vmlal_s8(vprod2x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup() 116 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup() 235 …const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup() local 239 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup() 265 …const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup() local 266 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
|
D | 4x8c2-minmax-neon-mlal-padal-dup.c | 124 …int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup() local 129 …vprod2x4567c0 = vmlal_s8(vprod2x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup() 133 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup() 272 …const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup() local 276 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup() 319 …const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup() local 320 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup()
|
D | 3x8c2-minmax-neon-mull-padal-dup.c | 135 …const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mull_padal_dup() local 139 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mull_padal_dup() 165 …const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mull_padal_dup() local 166 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mull_padal_dup()
|
D | 4x8c2-minmax-neon-mull-padal-dup.c | 146 …const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup() local 150 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup() 193 …const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup() local 194 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup()
|
D | 3x16c2-minmax-neon-mlal-padal-dup.c | 123 …int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup() local 127 …vprod2x4567c0 = vmlal_s8(vprod2x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup() 130 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup() 369 …const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup() local 373 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup() 425 …const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup() local 426 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
|
D | 4x16c2-minmax-neon-mlal-padal-dup.c | 140 …int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup() local 145 …vprod2x4567c0 = vmlal_s8(vprod2x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup() 149 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup() 432 …const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup() local 436 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup() 521 …const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup() local 522 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
|
D | 3x16c2-minmax-neon-mull-padal-dup.c | 181 …const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup() local 185 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup() 237 …const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup() local 238 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup()
|
D | 4x16c2-minmax-neon-mull-padal-dup.c | 194 …const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() local 198 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() 283 …const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() local 284 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup()
|