/external/XNNPACK/src/qs8-gemm/gen/ |
D | 1x8-minmax-rndnu-neon-mull-addw-dup.c | 62 const int16x8_t vprod0x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va0, 2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup() local 63 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup() 64 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup() 112 const int16x8_t vprod0x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va0, 2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup() local 113 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup() 114 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup()
|
D | 2x8-minmax-rndnu-neon-mull-addw-dup.c | 77 const int16x8_t vprod0x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va0, 2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup() local 78 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup() 79 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup() 152 const int16x8_t vprod0x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va0, 2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup() local 153 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup() 154 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup()
|
D | 1x16-minmax-rndnu-neon-mull-addw-dup.c | 74 const int16x8_t vprod0x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va0, 2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mull_addw_dup() local 75 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mull_addw_dup() 76 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mull_addw_dup() 163 const int16x8_t vprod0x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va0, 2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mull_addw_dup() local 164 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mull_addw_dup() 165 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mull_addw_dup()
|
D | 3x8-minmax-rndnu-neon-mull-addw-dup.c | 92 const int16x8_t vprod0x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va0, 2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup() local 93 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup() 94 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup() 192 const int16x8_t vprod0x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va0, 2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup() local 193 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup() 194 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup()
|
D | 4x8-minmax-rndnu-neon-mull-addw-dup.c | 107 const int16x8_t vprod0x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va0, 2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup() local 108 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup() 109 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup() 232 const int16x8_t vprod0x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va0, 2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup() local 233 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup() 234 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup()
|
D | 2x16-minmax-rndnu-neon-mull-addw-dup.c | 97 const int16x8_t vprod0x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va0, 2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup() local 98 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup() 99 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup() 235 const int16x8_t vprod0x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va0, 2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup() local 236 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup() 237 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup()
|
D | 3x16-minmax-rndnu-neon-mull-addw-dup.c | 120 const int16x8_t vprod0x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va0, 2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup() local 121 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup() 122 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup() 307 const int16x8_t vprod0x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va0, 2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup() local 308 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup() 309 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup()
|
D | 4x16-minmax-rndnu-neon-mull-addw-dup.c | 143 const int16x8_t vprod0x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va0, 2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mull_addw_dup() local 144 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mull_addw_dup() 145 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mull_addw_dup() 379 const int16x8_t vprod0x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va0, 2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mull_addw_dup() local 380 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mull_addw_dup() 381 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mull_addw_dup()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 1x8-minmax-rndnu-neon-mull-addw-dup.c | 73 const int16x8_t vprod0x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va0, 2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup() local 74 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup() 75 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup() 123 const int16x8_t vprod0x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va0, 2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup() local 124 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup() 125 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup()
|
D | 2x8-minmax-rndnu-neon-mull-addw-dup.c | 90 const int16x8_t vprod0x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va0, 2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup() local 91 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup() 92 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup() 165 const int16x8_t vprod0x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va0, 2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup() local 166 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup() 167 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup()
|
D | 1x16-minmax-rndnu-neon-mull-addw-dup.c | 85 const int16x8_t vprod0x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va0, 2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mull_addw_dup() local 86 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mull_addw_dup() 87 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mull_addw_dup() 174 const int16x8_t vprod0x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va0, 2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mull_addw_dup() local 175 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mull_addw_dup() 176 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mull_addw_dup()
|
D | 3x8-minmax-rndnu-neon-mull-addw-dup.c | 107 const int16x8_t vprod0x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va0, 2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup() local 108 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup() 109 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup() 207 const int16x8_t vprod0x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va0, 2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup() local 208 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup() 209 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup()
|
D | 4x8-minmax-rndnu-neon-mull-addw-dup.c | 124 const int16x8_t vprod0x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va0, 2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup() local 125 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup() 126 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup() 249 const int16x8_t vprod0x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va0, 2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup() local 250 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup() 251 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup()
|
D | 2x16-minmax-rndnu-neon-mull-addw-dup.c | 110 const int16x8_t vprod0x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va0, 2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup() local 111 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup() 112 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup() 248 const int16x8_t vprod0x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va0, 2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup() local 249 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup() 250 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup()
|
D | 3x16-minmax-rndnu-neon-mull-addw-dup.c | 135 const int16x8_t vprod0x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va0, 2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup() local 136 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup() 137 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup() 322 const int16x8_t vprod0x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va0, 2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup() local 323 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup() 324 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup()
|
D | 4x16-minmax-rndnu-neon-mull-addw-dup.c | 160 const int16x8_t vprod0x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va0, 2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mull_addw_dup() local 161 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mull_addw_dup() 162 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mull_addw_dup() 396 const int16x8_t vprod0x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va0, 2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mull_addw_dup() local 397 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mull_addw_dup() 398 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mull_addw_dup()
|