/external/XNNPACK/src/qs8-gemm/gen/ |
D | 1x8-minmax-rndnu-neon-mull-addw-dup.c | 57 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup() local 58 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup() 59 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup() 105 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup() local 106 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup() 107 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup()
|
D | 2x8-minmax-rndnu-neon-mull-addw-dup.c | 69 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup() local 70 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup() 71 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup() 142 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup() local 143 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup() 144 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup()
|
D | 1x16-minmax-rndnu-neon-mull-addw-dup.c | 64 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mull_addw_dup() local 65 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mull_addw_dup() 66 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mull_addw_dup() 152 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mull_addw_dup() local 153 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mull_addw_dup() 154 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mull_addw_dup()
|
D | 3x8-minmax-rndnu-neon-mull-addw-dup.c | 81 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup() local 82 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup() 83 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup() 179 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup() local 180 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup() 181 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup()
|
D | 4x8-minmax-rndnu-neon-mull-addw-dup.c | 93 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup() local 94 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup() 95 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup() 216 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup() local 217 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup() 218 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup()
|
D | 2x16-minmax-rndnu-neon-mull-addw-dup.c | 81 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup() local 82 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup() 83 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup() 218 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup() local 219 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup() 220 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup()
|
D | 3x16-minmax-rndnu-neon-mull-addw-dup.c | 98 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup() local 99 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup() 100 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup() 284 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup() local 285 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup() 286 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup()
|
D | 4x16-minmax-rndnu-neon-mull-addw-dup.c | 115 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mull_addw_dup() local 116 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mull_addw_dup() 117 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mull_addw_dup() 350 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mull_addw_dup() local 351 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mull_addw_dup() 352 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mull_addw_dup()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 1x8-minmax-rndnu-neon-mull-addw-dup.c | 68 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup() local 69 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup() 70 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup() 116 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup() local 117 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup() 118 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup()
|
D | 2x8-minmax-rndnu-neon-mull-addw-dup.c | 82 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup() local 83 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup() 84 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup() 155 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup() local 156 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup() 157 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup()
|
D | 1x16-minmax-rndnu-neon-mull-addw-dup.c | 75 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mull_addw_dup() local 76 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mull_addw_dup() 77 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mull_addw_dup() 163 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mull_addw_dup() local 164 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mull_addw_dup() 165 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mull_addw_dup()
|
D | 3x8-minmax-rndnu-neon-mull-addw-dup.c | 96 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup() local 97 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup() 98 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup() 194 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup() local 195 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup() 196 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup()
|
D | 4x8-minmax-rndnu-neon-mull-addw-dup.c | 110 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup() local 111 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup() 112 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup() 233 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup() local 234 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup() 235 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup()
|
D | 2x16-minmax-rndnu-neon-mull-addw-dup.c | 94 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup() local 95 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup() 96 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup() 231 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup() local 232 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup() 233 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup()
|
D | 3x16-minmax-rndnu-neon-mull-addw-dup.c | 113 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup() local 114 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup() 115 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup() 299 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup() local 300 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup() 301 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup()
|
D | 4x16-minmax-rndnu-neon-mull-addw-dup.c | 132 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mull_addw_dup() local 133 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mull_addw_dup() 134 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mull_addw_dup() 367 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mull_addw_dup() local 368 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mull_addw_dup() 369 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mull_addw_dup()
|