/external/XNNPACK/src/qs8-igemm/gen/ |
D | 2x16c2-minmax-rndnu-neon-mlal-ld1r.c | 210 const int8x8_t va1c3x1 = vreinterpret_s8_s16(va13x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r() local 216 vprod1x0123c3 = vmlal_s8(vprod1x0123c3, vb0123c3x1, va1c3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r() 223 vprod1x4567c3 = vmlal_s8(vprod1x4567c3, vb4567c3x1, va1c3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r() 230 vprod1x89ABc3 = vmlal_s8(vprod1x89ABc3, vb89ABc3x1, va1c3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r() 237 vprod1xCDEFc3 = vmlal_s8(vprod1xCDEFc3, vbCDEFc3x1, va1c3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r()
|
D | 2x16c2-minmax-rndnu-neon-mlal-ld4r.c | 198 const int8x8_t va1c3x1 = vreinterpret_s8_s16(va1x1.val[3]); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r() local 204 vprod1x0123c3 = vmlal_s8(vprod1x0123c3, vb0123c3x1, va1c3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r() 211 vprod1x4567c3 = vmlal_s8(vprod1x4567c3, vb4567c3x1, va1c3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r() 218 vprod1x89ABc3 = vmlal_s8(vprod1x89ABc3, vb89ABc3x1, va1c3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r() 225 vprod1xCDEFc3 = vmlal_s8(vprod1xCDEFc3, vbCDEFc3x1, va1c3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r()
|
D | 2x16c2-minmax-rndnu-neon-mlal-dup.c | 198 const int8x8_t va1c3x1 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va1x1), 3)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup() local 204 vprod1x0123c3 = vmlal_s8(vprod1x0123c3, vb0123c3x1, va1c3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup() 211 vprod1x4567c3 = vmlal_s8(vprod1x4567c3, vb4567c3x1, va1c3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup() 218 vprod1x89ABc3 = vmlal_s8(vprod1x89ABc3, vb89ABc3x1, va1c3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup() 225 vprod1xCDEFc3 = vmlal_s8(vprod1xCDEFc3, vbCDEFc3x1, va1c3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup()
|
D | 2x16c2-minmax-rndnu-neon-mlal-ld2r.c | 202 const int8x8_t va1c3x1 = vreinterpret_s8_s16(va11x1.val[1]); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r() local 208 vprod1x0123c3 = vmlal_s8(vprod1x0123c3, vb0123c3x1, va1c3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r() 215 vprod1x4567c3 = vmlal_s8(vprod1x4567c3, vb4567c3x1, va1c3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r() 222 vprod1x89ABc3 = vmlal_s8(vprod1x89ABc3, vb89ABc3x1, va1c3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r() 229 vprod1xCDEFc3 = vmlal_s8(vprod1xCDEFc3, vbCDEFc3x1, va1c3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r()
|
D | 2x8c2-minmax-rndnu-neon-mlal-ld4r.c | 144 const int8x8_t va1c3x1 = vreinterpret_s8_s16(va1x1.val[3]); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld4r() local 150 vprod1x0123c3 = vmlal_s8(vprod1x0123c3, vb0123c3x1, va1c3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld4r() 157 vprod1x4567c3 = vmlal_s8(vprod1x4567c3, vb4567c3x1, va1c3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld4r()
|
D | 2x8c2-minmax-fp32-neon-mlal-ld4r.c | 144 const int8x8_t va1c3x1 = vreinterpret_s8_s16(va1x1.val[3]); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() local 150 vprod1x0123c3 = vmlal_s8(vprod1x0123c3, vb0123c3x1, va1c3x1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() 157 vprod1x4567c3 = vmlal_s8(vprod1x4567c3, vb4567c3x1, va1c3x1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r()
|
D | 2x8c2-minmax-fp32-neonv8-mlal-ld2r.c | 149 const int8x8_t va1c3x1 = vreinterpret_s8_s16(va11x1.val[1]); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r() local 155 vprod1x0123c3 = vmlal_s8(vprod1x0123c3, vb0123c3x1, va1c3x1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r() 162 vprod1x4567c3 = vmlal_s8(vprod1x4567c3, vb4567c3x1, va1c3x1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r()
|
D | 2x8c2-minmax-fp32-neonv8-mlal-dup.c | 145 const int8x8_t va1c3x1 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va1x1), 3)); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() local 151 vprod1x0123c3 = vmlal_s8(vprod1x0123c3, vb0123c3x1, va1c3x1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() 158 vprod1x4567c3 = vmlal_s8(vprod1x4567c3, vb4567c3x1, va1c3x1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup()
|
D | 2x8c2-minmax-rndnu-neon-mlal-ld2r.c | 148 const int8x8_t va1c3x1 = vreinterpret_s8_s16(va11x1.val[1]); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r() local 154 vprod1x0123c3 = vmlal_s8(vprod1x0123c3, vb0123c3x1, va1c3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r() 161 vprod1x4567c3 = vmlal_s8(vprod1x4567c3, vb4567c3x1, va1c3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r()
|
D | 2x8c2-minmax-fp32-neonv8-mlal-ld4r.c | 145 const int8x8_t va1c3x1 = vreinterpret_s8_s16(va1x1.val[3]); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() local 151 vprod1x0123c3 = vmlal_s8(vprod1x0123c3, vb0123c3x1, va1c3x1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() 158 vprod1x4567c3 = vmlal_s8(vprod1x4567c3, vb4567c3x1, va1c3x1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 2x16c2-minmax-rndnu-neon-mlal-ld2r.c | 188 const int8x8_t va1c3x1 = vreinterpret_s8_s16(va11x1.val[1]); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r() local 194 vprod1x0123c3 = vmlal_s8(vprod1x0123c3, vb0123c3x1, va1c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r() 201 vprod1x4567c3 = vmlal_s8(vprod1x4567c3, vb4567c3x1, va1c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r() 208 vprod1x89ABc3 = vmlal_s8(vprod1x89ABc3, vb89ABc3x1, va1c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r() 215 vprod1xCDEFc3 = vmlal_s8(vprod1xCDEFc3, vbCDEFc3x1, va1c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r()
|
D | 2x16c2-minmax-rndnu-neon-mlal-ld1r.c | 196 const int8x8_t va1c3x1 = vreinterpret_s8_s16(va13x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r() local 202 vprod1x0123c3 = vmlal_s8(vprod1x0123c3, vb0123c3x1, va1c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r() 209 vprod1x4567c3 = vmlal_s8(vprod1x4567c3, vb4567c3x1, va1c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r() 216 vprod1x89ABc3 = vmlal_s8(vprod1x89ABc3, vb89ABc3x1, va1c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r() 223 vprod1xCDEFc3 = vmlal_s8(vprod1xCDEFc3, vbCDEFc3x1, va1c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r()
|
D | 2x16c2-minmax-rndnu-neon-mlal-dup.c | 184 const int8x8_t va1c3x1 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va1x1), 3)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup() local 190 vprod1x0123c3 = vmlal_s8(vprod1x0123c3, vb0123c3x1, va1c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup() 197 vprod1x4567c3 = vmlal_s8(vprod1x4567c3, vb4567c3x1, va1c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup() 204 vprod1x89ABc3 = vmlal_s8(vprod1x89ABc3, vb89ABc3x1, va1c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup() 211 vprod1xCDEFc3 = vmlal_s8(vprod1xCDEFc3, vbCDEFc3x1, va1c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup()
|
D | 2x16c2-minmax-rndnu-neon-mlal-ld4r.c | 184 const int8x8_t va1c3x1 = vreinterpret_s8_s16(va1x1.val[3]); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r() local 190 vprod1x0123c3 = vmlal_s8(vprod1x0123c3, vb0123c3x1, va1c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r() 197 vprod1x4567c3 = vmlal_s8(vprod1x4567c3, vb4567c3x1, va1c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r() 204 vprod1x89ABc3 = vmlal_s8(vprod1x89ABc3, vb89ABc3x1, va1c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r() 211 vprod1xCDEFc3 = vmlal_s8(vprod1xCDEFc3, vbCDEFc3x1, va1c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r()
|
D | 2x8c2-minmax-fp32-neon-mlal-ld1r.c | 142 const int8x8_t va1c3x1 = vreinterpret_s8_s16(va13x1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r() local 148 vprod1x0123c3 = vmlal_s8(vprod1x0123c3, vb0123c3x1, va1c3x1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r() 155 vprod1x4567c3 = vmlal_s8(vprod1x4567c3, vb4567c3x1, va1c3x1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r()
|
D | 2x8c2-minmax-rndnu-neon-mlal-ld4r.c | 130 const int8x8_t va1c3x1 = vreinterpret_s8_s16(va1x1.val[3]); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld4r() local 136 vprod1x0123c3 = vmlal_s8(vprod1x0123c3, vb0123c3x1, va1c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld4r() 143 vprod1x4567c3 = vmlal_s8(vprod1x4567c3, vb4567c3x1, va1c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld4r()
|
D | 2x8c2-minmax-rndnu-neon-mlal-ld1r.c | 142 const int8x8_t va1c3x1 = vreinterpret_s8_s16(va13x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r() local 148 vprod1x0123c3 = vmlal_s8(vprod1x0123c3, vb0123c3x1, va1c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r() 155 vprod1x4567c3 = vmlal_s8(vprod1x4567c3, vb4567c3x1, va1c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r()
|
D | 2x8c2-minmax-fp32-neonv8-mlal-ld1r.c | 143 const int8x8_t va1c3x1 = vreinterpret_s8_s16(va13x1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r() local 149 vprod1x0123c3 = vmlal_s8(vprod1x0123c3, vb0123c3x1, va1c3x1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r() 156 vprod1x4567c3 = vmlal_s8(vprod1x4567c3, vb4567c3x1, va1c3x1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r()
|
/external/XNNPACK/src/qc8-gemm/gen/ |
D | 2x8c2-minmax-fp32-neonv8-mlal-ld4r.c | 131 const int8x8_t va1c3x1 = vreinterpret_s8_s16(va1x1.val[3]); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() local 137 vprod1x0123c3 = vmlal_s8(vprod1x0123c3, vb0123c3x1, va1c3x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() 144 vprod1x4567c3 = vmlal_s8(vprod1x4567c3, vb4567c3x1, va1c3x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r()
|
D | 2x8c2-minmax-fp32-neonv8-mlal-dup.c | 131 const int8x8_t va1c3x1 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va1x1), 3)); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() local 137 vprod1x0123c3 = vmlal_s8(vprod1x0123c3, vb0123c3x1, va1c3x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() 144 vprod1x4567c3 = vmlal_s8(vprod1x4567c3, vb4567c3x1, va1c3x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup()
|
D | 2x8c2-minmax-fp32-neon-mlal-ld1r.c | 142 const int8x8_t va1c3x1 = vreinterpret_s8_s16(va13x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r() local 148 vprod1x0123c3 = vmlal_s8(vprod1x0123c3, vb0123c3x1, va1c3x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r() 155 vprod1x4567c3 = vmlal_s8(vprod1x4567c3, vb4567c3x1, va1c3x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r()
|
D | 2x8c2-minmax-fp32-neonv8-mlal-ld1r.c | 143 const int8x8_t va1c3x1 = vreinterpret_s8_s16(va13x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r() local 149 vprod1x0123c3 = vmlal_s8(vprod1x0123c3, vb0123c3x1, va1c3x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r() 156 vprod1x4567c3 = vmlal_s8(vprod1x4567c3, vb4567c3x1, va1c3x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r()
|
D | 2x8c2-minmax-fp32-neon-mlal-ld4r.c | 130 const int8x8_t va1c3x1 = vreinterpret_s8_s16(va1x1.val[3]); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() local 136 vprod1x0123c3 = vmlal_s8(vprod1x0123c3, vb0123c3x1, va1c3x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() 143 vprod1x4567c3 = vmlal_s8(vprod1x4567c3, vb4567c3x1, va1c3x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r()
|
/external/XNNPACK/src/qc8-igemm/gen/ |
D | 2x8c2-minmax-fp32-neonv8-mlal-ld4r.c | 145 const int8x8_t va1c3x1 = vreinterpret_s8_s16(va1x1.val[3]); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() local 151 vprod1x0123c3 = vmlal_s8(vprod1x0123c3, vb0123c3x1, va1c3x1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() 158 vprod1x4567c3 = vmlal_s8(vprod1x4567c3, vb4567c3x1, va1c3x1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r()
|
D | 2x8c2-minmax-fp32-neonv8-mlal-dup.c | 145 const int8x8_t va1c3x1 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va1x1), 3)); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() local 151 vprod1x0123c3 = vmlal_s8(vprod1x0123c3, vb0123c3x1, va1c3x1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() 158 vprod1x4567c3 = vmlal_s8(vprod1x4567c3, vb4567c3x1, va1c3x1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup()
|