/external/XNNPACK/src/qs8-igemm/gen/ |
D | 3x16c4-minmax-rndnu-neon-mlal-ld1r.c | 133 const int8x8_t va2c0x0 = vreinterpret_s8_s32(va20x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() local 138 int16x8_t vprod2x01c0 = vmull_s8(vb01c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() 148 int16x8_t vprod2x23c0 = vmull_s8(vb23c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() 158 int16x8_t vprod2x45c0 = vmull_s8(vb45c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() 168 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() 178 int16x8_t vprod2x89c0 = vmull_s8(vb89c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() 188 int16x8_t vprod2xABc0 = vmull_s8(vbABc0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() 198 int16x8_t vprod2xCDc0 = vmull_s8(vbCDc0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() 208 int16x8_t vprod2xEFc0 = vmull_s8(vbEFc0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
|
D | 3x16c4-minmax-rndnu-neon-mlal-ld2r.c | 127 const int8x8_t va2c0x0 = vreinterpret_s8_s32(va2x0.val[0]); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() local 132 int16x8_t vprod2x01c0 = vmull_s8(vb01c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() 142 int16x8_t vprod2x23c0 = vmull_s8(vb23c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() 152 int16x8_t vprod2x45c0 = vmull_s8(vb45c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() 162 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() 172 int16x8_t vprod2x89c0 = vmull_s8(vb89c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() 182 int16x8_t vprod2xABc0 = vmull_s8(vbABc0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() 192 int16x8_t vprod2xCDc0 = vmull_s8(vbCDc0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() 202 int16x8_t vprod2xEFc0 = vmull_s8(vbEFc0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r()
|
D | 3x16c4-minmax-rndnu-neon-mlal-dup.c | 127 const int8x8_t va2c0x0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va2x0), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() local 132 int16x8_t vprod2x01c0 = vmull_s8(vb01c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() 142 int16x8_t vprod2x23c0 = vmull_s8(vb23c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() 152 int16x8_t vprod2x45c0 = vmull_s8(vb45c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() 162 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() 172 int16x8_t vprod2x89c0 = vmull_s8(vb89c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() 182 int16x8_t vprod2xABc0 = vmull_s8(vbABc0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() 192 int16x8_t vprod2xCDc0 = vmull_s8(vbCDc0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() 202 int16x8_t vprod2xEFc0 = vmull_s8(vbEFc0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup()
|
D | 3x8c4-minmax-rndnu-neon-mlal-dup.c | 107 const int8x8_t va2c0x0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va2x0), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup() local 112 int16x8_t vprod2x01c0 = vmull_s8(vb01c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup() 122 int16x8_t vprod2x23c0 = vmull_s8(vb23c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup() 132 int16x8_t vprod2x45c0 = vmull_s8(vb45c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup() 142 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup()
|
D | 3x8c4-minmax-rndnu-neon-mlal-ld1r.c | 113 const int8x8_t va2c0x0 = vreinterpret_s8_s32(va20x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r() local 118 int16x8_t vprod2x01c0 = vmull_s8(vb01c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r() 128 int16x8_t vprod2x23c0 = vmull_s8(vb23c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r() 138 int16x8_t vprod2x45c0 = vmull_s8(vb45c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r() 148 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r()
|
D | 3x8c4-minmax-rndnu-neon-mlal-ld2r.c | 107 const int8x8_t va2c0x0 = vreinterpret_s8_s32(va2x0.val[0]); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r() local 112 int16x8_t vprod2x01c0 = vmull_s8(vb01c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r() 122 int16x8_t vprod2x23c0 = vmull_s8(vb23c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r() 132 int16x8_t vprod2x45c0 = vmull_s8(vb45c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r() 142 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld1r.c | 153 const int8x8_t va2c0x0 = vreinterpret_s8_s32(va20x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 160 int16x8_t vprod2x01c0 = vmull_s8(vb01c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 173 int16x8_t vprod2x23c0 = vmull_s8(vb23c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 186 int16x8_t vprod2x45c0 = vmull_s8(vb45c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 199 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 212 int16x8_t vprod2x89c0 = vmull_s8(vb89c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 225 int16x8_t vprod2xABc0 = vmull_s8(vbABc0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 238 int16x8_t vprod2xCDc0 = vmull_s8(vbCDc0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 251 int16x8_t vprod2xEFc0 = vmull_s8(vbEFc0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
|
D | 4x16c4-minmax-rndnu-neon-mlal-dup.c | 145 const int8x8_t va2c0x0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va2x0), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 152 int16x8_t vprod2x01c0 = vmull_s8(vb01c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 165 int16x8_t vprod2x23c0 = vmull_s8(vb23c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 178 int16x8_t vprod2x45c0 = vmull_s8(vb45c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 191 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 204 int16x8_t vprod2x89c0 = vmull_s8(vb89c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 217 int16x8_t vprod2xABc0 = vmull_s8(vbABc0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 230 int16x8_t vprod2xCDc0 = vmull_s8(vbCDc0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 243 int16x8_t vprod2xEFc0 = vmull_s8(vbEFc0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld2r.c | 145 const int8x8_t va2c0x0 = vreinterpret_s8_s32(va2x0.val[0]); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 152 int16x8_t vprod2x01c0 = vmull_s8(vb01c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 165 int16x8_t vprod2x23c0 = vmull_s8(vb23c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 178 int16x8_t vprod2x45c0 = vmull_s8(vb45c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 191 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 204 int16x8_t vprod2x89c0 = vmull_s8(vb89c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 217 int16x8_t vprod2xABc0 = vmull_s8(vbABc0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 230 int16x8_t vprod2xCDc0 = vmull_s8(vbCDc0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 243 int16x8_t vprod2xEFc0 = vmull_s8(vbEFc0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
|
D | 4x8c4-minmax-rndnu-neon-mlal-ld1r.c | 129 const int8x8_t va2c0x0 = vreinterpret_s8_s32(va20x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local 136 int16x8_t vprod2x01c0 = vmull_s8(vb01c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() 149 int16x8_t vprod2x23c0 = vmull_s8(vb23c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() 162 int16x8_t vprod2x45c0 = vmull_s8(vb45c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() 175 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
|
D | 4x8c4-minmax-rndnu-neon-mlal-dup.c | 121 const int8x8_t va2c0x0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va2x0), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local 128 int16x8_t vprod2x01c0 = vmull_s8(vb01c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() 141 int16x8_t vprod2x23c0 = vmull_s8(vb23c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() 154 int16x8_t vprod2x45c0 = vmull_s8(vb45c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() 167 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
|
D | 4x8c4-minmax-rndnu-neon-mlal-ld2r.c | 121 const int8x8_t va2c0x0 = vreinterpret_s8_s32(va2x0.val[0]); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local 128 int16x8_t vprod2x01c0 = vmull_s8(vb01c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() 141 int16x8_t vprod2x23c0 = vmull_s8(vb23c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() 154 int16x8_t vprod2x45c0 = vmull_s8(vb45c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() 167 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x16c4-minmax-rndnu-neon-mlal-ld2r.c | 112 const int8x8_t va2c0x0 = vreinterpret_s8_s32(va2x0.val[0]); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() local 117 int16x8_t vprod2x01c0 = vmull_s8(vb01c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() 127 int16x8_t vprod2x23c0 = vmull_s8(vb23c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() 137 int16x8_t vprod2x45c0 = vmull_s8(vb45c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() 147 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() 157 int16x8_t vprod2x89c0 = vmull_s8(vb89c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() 167 int16x8_t vprod2xABc0 = vmull_s8(vbABc0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() 177 int16x8_t vprod2xCDc0 = vmull_s8(vbCDc0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() 187 int16x8_t vprod2xEFc0 = vmull_s8(vbEFc0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r()
|
D | 3x16c4-minmax-rndnu-neon-mlal-dup.c | 112 const int8x8_t va2c0x0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va2x0), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() local 117 int16x8_t vprod2x01c0 = vmull_s8(vb01c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() 127 int16x8_t vprod2x23c0 = vmull_s8(vb23c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() 137 int16x8_t vprod2x45c0 = vmull_s8(vb45c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() 147 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() 157 int16x8_t vprod2x89c0 = vmull_s8(vb89c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() 167 int16x8_t vprod2xABc0 = vmull_s8(vbABc0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() 177 int16x8_t vprod2xCDc0 = vmull_s8(vbCDc0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() 187 int16x8_t vprod2xEFc0 = vmull_s8(vbEFc0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup()
|
D | 3x16c4-minmax-rndnu-neon-mlal-ld1r.c | 118 const int8x8_t va2c0x0 = vreinterpret_s8_s32(va20x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() local 123 int16x8_t vprod2x01c0 = vmull_s8(vb01c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() 133 int16x8_t vprod2x23c0 = vmull_s8(vb23c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() 143 int16x8_t vprod2x45c0 = vmull_s8(vb45c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() 153 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() 163 int16x8_t vprod2x89c0 = vmull_s8(vb89c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() 173 int16x8_t vprod2xABc0 = vmull_s8(vbABc0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() 183 int16x8_t vprod2xCDc0 = vmull_s8(vbCDc0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() 193 int16x8_t vprod2xEFc0 = vmull_s8(vbEFc0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
|
D | 3x8c4-minmax-rndnu-neon-mlal-ld2r.c | 92 const int8x8_t va2c0x0 = vreinterpret_s8_s32(va2x0.val[0]); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r() local 97 int16x8_t vprod2x01c0 = vmull_s8(vb01c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r() 107 int16x8_t vprod2x23c0 = vmull_s8(vb23c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r() 117 int16x8_t vprod2x45c0 = vmull_s8(vb45c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r() 127 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r()
|
D | 3x8c4-minmax-rndnu-neon-mlal-ld1r.c | 98 const int8x8_t va2c0x0 = vreinterpret_s8_s32(va20x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r() local 103 int16x8_t vprod2x01c0 = vmull_s8(vb01c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r() 113 int16x8_t vprod2x23c0 = vmull_s8(vb23c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r() 123 int16x8_t vprod2x45c0 = vmull_s8(vb45c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r() 133 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r()
|
D | 3x8c4-minmax-rndnu-neon-mlal-dup.c | 92 const int8x8_t va2c0x0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va2x0), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup() local 97 int16x8_t vprod2x01c0 = vmull_s8(vb01c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup() 107 int16x8_t vprod2x23c0 = vmull_s8(vb23c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup() 117 int16x8_t vprod2x45c0 = vmull_s8(vb45c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup() 127 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup()
|
D | 4x16c4-minmax-rndnu-neon-mlal-dup.c | 128 const int8x8_t va2c0x0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va2x0), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 135 int16x8_t vprod2x01c0 = vmull_s8(vb01c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 148 int16x8_t vprod2x23c0 = vmull_s8(vb23c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 161 int16x8_t vprod2x45c0 = vmull_s8(vb45c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 174 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 187 int16x8_t vprod2x89c0 = vmull_s8(vb89c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 200 int16x8_t vprod2xABc0 = vmull_s8(vbABc0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 213 int16x8_t vprod2xCDc0 = vmull_s8(vbCDc0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 226 int16x8_t vprod2xEFc0 = vmull_s8(vbEFc0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld1r.c | 136 const int8x8_t va2c0x0 = vreinterpret_s8_s32(va20x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 143 int16x8_t vprod2x01c0 = vmull_s8(vb01c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 156 int16x8_t vprod2x23c0 = vmull_s8(vb23c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 169 int16x8_t vprod2x45c0 = vmull_s8(vb45c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 182 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 195 int16x8_t vprod2x89c0 = vmull_s8(vb89c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 208 int16x8_t vprod2xABc0 = vmull_s8(vbABc0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 221 int16x8_t vprod2xCDc0 = vmull_s8(vbCDc0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 234 int16x8_t vprod2xEFc0 = vmull_s8(vbEFc0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld2r.c | 128 const int8x8_t va2c0x0 = vreinterpret_s8_s32(va2x0.val[0]); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 135 int16x8_t vprod2x01c0 = vmull_s8(vb01c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 148 int16x8_t vprod2x23c0 = vmull_s8(vb23c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 161 int16x8_t vprod2x45c0 = vmull_s8(vb45c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 174 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 187 int16x8_t vprod2x89c0 = vmull_s8(vb89c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 200 int16x8_t vprod2xABc0 = vmull_s8(vbABc0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 213 int16x8_t vprod2xCDc0 = vmull_s8(vbCDc0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 226 int16x8_t vprod2xEFc0 = vmull_s8(vbEFc0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
|
D | 4x8c4-minmax-rndnu-neon-mlal-ld2r.c | 104 const int8x8_t va2c0x0 = vreinterpret_s8_s32(va2x0.val[0]); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local 111 int16x8_t vprod2x01c0 = vmull_s8(vb01c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() 124 int16x8_t vprod2x23c0 = vmull_s8(vb23c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() 137 int16x8_t vprod2x45c0 = vmull_s8(vb45c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() 150 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
|
D | 4x8c4-minmax-rndnu-neon-mlal-dup.c | 104 const int8x8_t va2c0x0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va2x0), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local 111 int16x8_t vprod2x01c0 = vmull_s8(vb01c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() 124 int16x8_t vprod2x23c0 = vmull_s8(vb23c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() 137 int16x8_t vprod2x45c0 = vmull_s8(vb45c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() 150 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
|
D | 4x8c4-minmax-rndnu-neon-mlal-ld1r.c | 112 const int8x8_t va2c0x0 = vreinterpret_s8_s32(va20x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local 119 int16x8_t vprod2x01c0 = vmull_s8(vb01c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() 132 int16x8_t vprod2x23c0 = vmull_s8(vb23c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() 145 int16x8_t vprod2x45c0 = vmull_s8(vb45c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() 158 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
|
D | 3x16c2-minmax-rndnu-neon-mlal-dup.c | 98 const int8x8_t va2c0x0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() local 103 int16x8_t vprod2x0123c0 = vmull_s8(vb0123c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() 113 int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() 123 int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() 133 int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup()
|