/external/XNNPACK/src/q8-igemm/ |
D | 8x8-neon.c | 86 int32x4_t vacc7x4567 = vacc0x4567; in xnn_q8_igemm_ukernel_8x8__neon() local 162 … vacc7x4567 = vmlal_lane_s16(vacc7x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa7), 0); in xnn_q8_igemm_ukernel_8x8__neon() 184 … vacc7x4567 = vmlal_lane_s16(vacc7x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa7), 1); in xnn_q8_igemm_ukernel_8x8__neon() 206 … vacc7x4567 = vmlal_lane_s16(vacc7x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa7), 2); in xnn_q8_igemm_ukernel_8x8__neon() 228 … vacc7x4567 = vmlal_lane_s16(vacc7x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa7), 3); in xnn_q8_igemm_ukernel_8x8__neon() 250 … vacc7x4567 = vmlal_lane_s16(vacc7x4567, vget_high_s16(vxb01234567), vget_high_s16(vxa7), 0); in xnn_q8_igemm_ukernel_8x8__neon() 272 … vacc7x4567 = vmlal_lane_s16(vacc7x4567, vget_high_s16(vxb01234567), vget_high_s16(vxa7), 1); in xnn_q8_igemm_ukernel_8x8__neon() 294 … vacc7x4567 = vmlal_lane_s16(vacc7x4567, vget_high_s16(vxb01234567), vget_high_s16(vxa7), 2); in xnn_q8_igemm_ukernel_8x8__neon() 316 … vacc7x4567 = vmlal_lane_s16(vacc7x4567, vget_high_s16(vxb01234567), vget_high_s16(vxa7), 3); in xnn_q8_igemm_ukernel_8x8__neon() 358 … vacc7x4567 = vmlal_lane_s16(vacc7x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa7), 0); in xnn_q8_igemm_ukernel_8x8__neon() [all …]
|
/external/XNNPACK/src/q8-gemm/ |
D | 8x8-neon.c | 97 int32x4_t vacc7x4567 = vacc0x4567; in xnn_q8_gemm_ukernel_8x8__neon() local 136 vacc7x4567 = vmlal_lane_s16(vacc7x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa7), 0); in xnn_q8_gemm_ukernel_8x8__neon() 156 vacc7x4567 = vmlal_lane_s16(vacc7x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa7), 1); in xnn_q8_gemm_ukernel_8x8__neon() 176 vacc7x4567 = vmlal_lane_s16(vacc7x4567, vget_high_s16(vxb01234567c2), vget_low_s16(vxa7), 2); in xnn_q8_gemm_ukernel_8x8__neon() 196 vacc7x4567 = vmlal_lane_s16(vacc7x4567, vget_high_s16(vxb01234567c3), vget_low_s16(vxa7), 3); in xnn_q8_gemm_ukernel_8x8__neon() 216 vacc7x4567 = vmlal_lane_s16(vacc7x4567, vget_high_s16(vxb01234567c4), vget_high_s16(vxa7), 0); in xnn_q8_gemm_ukernel_8x8__neon() 236 vacc7x4567 = vmlal_lane_s16(vacc7x4567, vget_high_s16(vxb01234567c5), vget_high_s16(vxa7), 1); in xnn_q8_gemm_ukernel_8x8__neon() 256 vacc7x4567 = vmlal_lane_s16(vacc7x4567, vget_high_s16(vxb01234567c6), vget_high_s16(vxa7), 2); in xnn_q8_gemm_ukernel_8x8__neon() 276 vacc7x4567 = vmlal_lane_s16(vacc7x4567, vget_high_s16(vxb01234567c7), vget_high_s16(vxa7), 3); in xnn_q8_gemm_ukernel_8x8__neon() 316 vacc7x4567 = vmlal_lane_s16(vacc7x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa7), 0); in xnn_q8_gemm_ukernel_8x8__neon() [all …]
|
/external/XNNPACK/src/f32-gemm/gen-inc/ |
D | 8x8s4-neon.c | 102 float32x4_t vacc7x4567 = vld1q_f32(acc); acc += 4; in xnn_f32_gemminc_ukernel_8x8s4__neon() local 134 vacc7x4567 = vmlaq_f32(vacc7x4567, va7, vb4567c0); in xnn_f32_gemminc_ukernel_8x8s4__neon() 163 vacc7x4567 = vmlaq_f32(vacc7x4567, va7, vb4567c1); in xnn_f32_gemminc_ukernel_8x8s4__neon() 192 vacc7x4567 = vmlaq_f32(vacc7x4567, va7, vb4567c2); in xnn_f32_gemminc_ukernel_8x8s4__neon() 221 vacc7x4567 = vmlaq_f32(vacc7x4567, va7, vb4567c3); in xnn_f32_gemminc_ukernel_8x8s4__neon() 256 vacc7x4567 = vmlaq_f32(vacc7x4567, va7, vb4567); in xnn_f32_gemminc_ukernel_8x8s4__neon() 277 vacc7x4567 = vminq_f32(vacc7x4567, vmax); in xnn_f32_gemminc_ukernel_8x8s4__neon() 295 vacc7x4567 = vmaxq_f32(vacc7x4567, vmin); in xnn_f32_gemminc_ukernel_8x8s4__neon() 299 vst1q_f32(c7 + 4, vacc7x4567); in xnn_f32_gemminc_ukernel_8x8s4__neon() 345 vacc7x0123 = vacc7x4567; in xnn_f32_gemminc_ukernel_8x8s4__neon()
|
D | 8x8s4-neonfma.c | 102 float32x4_t vacc7x4567 = vld1q_f32(acc); acc += 4; in xnn_f32_gemminc_ukernel_8x8s4__neonfma() local 134 vacc7x4567 = vfmaq_f32(vacc7x4567, va7, vb4567c0); in xnn_f32_gemminc_ukernel_8x8s4__neonfma() 163 vacc7x4567 = vfmaq_f32(vacc7x4567, va7, vb4567c1); in xnn_f32_gemminc_ukernel_8x8s4__neonfma() 192 vacc7x4567 = vfmaq_f32(vacc7x4567, va7, vb4567c2); in xnn_f32_gemminc_ukernel_8x8s4__neonfma() 221 vacc7x4567 = vfmaq_f32(vacc7x4567, va7, vb4567c3); in xnn_f32_gemminc_ukernel_8x8s4__neonfma() 256 vacc7x4567 = vfmaq_f32(vacc7x4567, va7, vb4567); in xnn_f32_gemminc_ukernel_8x8s4__neonfma() 277 vacc7x4567 = vminq_f32(vacc7x4567, vmax); in xnn_f32_gemminc_ukernel_8x8s4__neonfma() 295 vacc7x4567 = vmaxq_f32(vacc7x4567, vmin); in xnn_f32_gemminc_ukernel_8x8s4__neonfma() 299 vst1q_f32(c7 + 4, vacc7x4567); in xnn_f32_gemminc_ukernel_8x8s4__neonfma() 345 vacc7x0123 = vacc7x4567; in xnn_f32_gemminc_ukernel_8x8s4__neonfma()
|
/external/XNNPACK/src/f32-igemm/gen/ |
D | 8x8s4-neon.c | 89 float32x4_t vacc7x4567 = vacc0x4567; in xnn_f32_igemm_ukernel_8x8s4__neon() local 165 vacc7x4567 = vmlaq_f32(vacc7x4567, va7, vb4567c0); in xnn_f32_igemm_ukernel_8x8s4__neon() 194 vacc7x4567 = vmlaq_f32(vacc7x4567, va7, vb4567c1); in xnn_f32_igemm_ukernel_8x8s4__neon() 223 vacc7x4567 = vmlaq_f32(vacc7x4567, va7, vb4567c2); in xnn_f32_igemm_ukernel_8x8s4__neon() 252 vacc7x4567 = vmlaq_f32(vacc7x4567, va7, vb4567c3); in xnn_f32_igemm_ukernel_8x8s4__neon() 287 vacc7x4567 = vmlaq_f32(vacc7x4567, va7, vb4567); in xnn_f32_igemm_ukernel_8x8s4__neon() 312 vacc7x4567 = vminq_f32(vacc7x4567, vmax); in xnn_f32_igemm_ukernel_8x8s4__neon() 330 vacc7x4567 = vmaxq_f32(vacc7x4567, vmin); in xnn_f32_igemm_ukernel_8x8s4__neon() 334 vst1q_f32(c7 + 4, vacc7x4567); in xnn_f32_igemm_ukernel_8x8s4__neon() 371 vacc7x0123 = vacc7x4567; in xnn_f32_igemm_ukernel_8x8s4__neon()
|
D | 8x8s4-neonfma.c | 89 float32x4_t vacc7x4567 = vacc0x4567; in xnn_f32_igemm_ukernel_8x8s4__neonfma() local 165 vacc7x4567 = vfmaq_f32(vacc7x4567, va7, vb4567c0); in xnn_f32_igemm_ukernel_8x8s4__neonfma() 194 vacc7x4567 = vfmaq_f32(vacc7x4567, va7, vb4567c1); in xnn_f32_igemm_ukernel_8x8s4__neonfma() 223 vacc7x4567 = vfmaq_f32(vacc7x4567, va7, vb4567c2); in xnn_f32_igemm_ukernel_8x8s4__neonfma() 252 vacc7x4567 = vfmaq_f32(vacc7x4567, va7, vb4567c3); in xnn_f32_igemm_ukernel_8x8s4__neonfma() 287 vacc7x4567 = vfmaq_f32(vacc7x4567, va7, vb4567); in xnn_f32_igemm_ukernel_8x8s4__neonfma() 312 vacc7x4567 = vminq_f32(vacc7x4567, vmax); in xnn_f32_igemm_ukernel_8x8s4__neonfma() 330 vacc7x4567 = vmaxq_f32(vacc7x4567, vmin); in xnn_f32_igemm_ukernel_8x8s4__neonfma() 334 vst1q_f32(c7 + 4, vacc7x4567); in xnn_f32_igemm_ukernel_8x8s4__neonfma() 371 vacc7x0123 = vacc7x4567; in xnn_f32_igemm_ukernel_8x8s4__neonfma()
|
/external/XNNPACK/src/f32-gemm/gen/ |
D | 8x8s4-neonfma.c | 100 float32x4_t vacc7x4567 = vacc0x4567; in xnn_f32_gemm_ukernel_8x8s4__neonfma() local 132 vacc7x4567 = vfmaq_f32(vacc7x4567, va7, vb4567c0); in xnn_f32_gemm_ukernel_8x8s4__neonfma() 161 vacc7x4567 = vfmaq_f32(vacc7x4567, va7, vb4567c1); in xnn_f32_gemm_ukernel_8x8s4__neonfma() 190 vacc7x4567 = vfmaq_f32(vacc7x4567, va7, vb4567c2); in xnn_f32_gemm_ukernel_8x8s4__neonfma() 219 vacc7x4567 = vfmaq_f32(vacc7x4567, va7, vb4567c3); in xnn_f32_gemm_ukernel_8x8s4__neonfma() 254 vacc7x4567 = vfmaq_f32(vacc7x4567, va7, vb4567); in xnn_f32_gemm_ukernel_8x8s4__neonfma() 275 vacc7x4567 = vminq_f32(vacc7x4567, vmax); in xnn_f32_gemm_ukernel_8x8s4__neonfma() 293 vacc7x4567 = vmaxq_f32(vacc7x4567, vmin); in xnn_f32_gemm_ukernel_8x8s4__neonfma() 297 vst1q_f32(c7 + 4, vacc7x4567); in xnn_f32_gemm_ukernel_8x8s4__neonfma() 343 vacc7x0123 = vacc7x4567; in xnn_f32_gemm_ukernel_8x8s4__neonfma()
|
D | 8x8s4-neon.c | 100 float32x4_t vacc7x4567 = vacc0x4567; in xnn_f32_gemm_ukernel_8x8s4__neon() local 132 vacc7x4567 = vmlaq_f32(vacc7x4567, va7, vb4567c0); in xnn_f32_gemm_ukernel_8x8s4__neon() 161 vacc7x4567 = vmlaq_f32(vacc7x4567, va7, vb4567c1); in xnn_f32_gemm_ukernel_8x8s4__neon() 190 vacc7x4567 = vmlaq_f32(vacc7x4567, va7, vb4567c2); in xnn_f32_gemm_ukernel_8x8s4__neon() 219 vacc7x4567 = vmlaq_f32(vacc7x4567, va7, vb4567c3); in xnn_f32_gemm_ukernel_8x8s4__neon() 254 vacc7x4567 = vmlaq_f32(vacc7x4567, va7, vb4567); in xnn_f32_gemm_ukernel_8x8s4__neon() 275 vacc7x4567 = vminq_f32(vacc7x4567, vmax); in xnn_f32_gemm_ukernel_8x8s4__neon() 293 vacc7x4567 = vmaxq_f32(vacc7x4567, vmin); in xnn_f32_gemm_ukernel_8x8s4__neon() 297 vst1q_f32(c7 + 4, vacc7x4567); in xnn_f32_gemm_ukernel_8x8s4__neon() 343 vacc7x0123 = vacc7x4567; in xnn_f32_gemm_ukernel_8x8s4__neon()
|
/external/XNNPACK/src/f32-ppmm/gen/ |
D | 8x8-neonfma.c | 81 float32x4_t vacc7x4567 = vacc0x4567; in xnn_f32_ppmm_ukernel_8x8__neonfma() local 107 vacc7x4567 = vfmaq_laneq_f32(vacc7x4567, vb4567, va4567, 3); in xnn_f32_ppmm_ukernel_8x8__neonfma() 133 vacc7x4567 = vfmaq_f32(vacc7x4567, va7777, vb4567); in xnn_f32_ppmm_ukernel_8x8__neonfma() 155 vacc7x4567 = vminq_f32(vacc7x4567, vmax); in xnn_f32_ppmm_ukernel_8x8__neonfma() 173 vacc7x4567 = vmaxq_f32(vacc7x4567, vmin); in xnn_f32_ppmm_ukernel_8x8__neonfma() 177 vst1q_f32(c7 + 4, vacc7x4567); in xnn_f32_ppmm_ukernel_8x8__neonfma() 215 vacc7x0123 = vacc7x4567; in xnn_f32_ppmm_ukernel_8x8__neonfma()
|
D | 8x8-neon.c | 81 float32x4_t vacc7x4567 = vacc0x4567; in xnn_f32_ppmm_ukernel_8x8__neon() local 106 vacc7x4567 = vmlaq_lane_f32(vacc7x4567, vb4567, vget_high_f32(va4567), 1); in xnn_f32_ppmm_ukernel_8x8__neon() 127 vacc7x4567 = vminq_f32(vacc7x4567, vmax); in xnn_f32_ppmm_ukernel_8x8__neon() 145 vacc7x4567 = vmaxq_f32(vacc7x4567, vmin); in xnn_f32_ppmm_ukernel_8x8__neon() 149 vst1q_f32(c7 + 4, vacc7x4567); in xnn_f32_ppmm_ukernel_8x8__neon() 187 vacc7x0123 = vacc7x4567; in xnn_f32_ppmm_ukernel_8x8__neon()
|