/external/XNNPACK/src/qs8-gemm/gen/ |
D | 1x16c2-minmax-rndnu-neon-mlal-ld2r.c | 130 const int8x8_t va0c3x1 = vreinterpret_s8_s16(va01x1.val[1]); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r() local 134 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r() 138 vprod0x4567c3 = vmlal_s8(vprod0x4567c3, vb4567c3x1, va0c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r() 142 vprod0x89ABc3 = vmlal_s8(vprod0x89ABc3, vb89ABc3x1, va0c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r() 146 vprod0xCDEFc3 = vmlal_s8(vprod0xCDEFc3, vbCDEFc3x1, va0c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r()
|
D | 1x16c2-minmax-rndnu-neon-mlal-dup.c | 128 const int8x8_t va0c3x1 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 3)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup() local 132 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup() 136 vprod0x4567c3 = vmlal_s8(vprod0x4567c3, vb4567c3x1, va0c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup() 140 vprod0x89ABc3 = vmlal_s8(vprod0x89ABc3, vb89ABc3x1, va0c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup() 144 vprod0xCDEFc3 = vmlal_s8(vprod0xCDEFc3, vbCDEFc3x1, va0c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup()
|
D | 1x16c2-minmax-rndnu-neon-mlal-ld1r.c | 134 const int8x8_t va0c3x1 = vreinterpret_s8_s16(va03x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r() local 138 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r() 142 vprod0x4567c3 = vmlal_s8(vprod0x4567c3, vb4567c3x1, va0c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r() 146 vprod0x89ABc3 = vmlal_s8(vprod0x89ABc3, vb89ABc3x1, va0c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r() 150 vprod0xCDEFc3 = vmlal_s8(vprod0xCDEFc3, vbCDEFc3x1, va0c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r()
|
D | 1x16c2-minmax-rndnu-neon-mlal-ld4r.c | 128 const int8x8_t va0c3x1 = vreinterpret_s8_s16(va0x1.val[3]); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r() local 132 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r() 136 vprod0x4567c3 = vmlal_s8(vprod0x4567c3, vb4567c3x1, va0c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r() 140 vprod0x89ABc3 = vmlal_s8(vprod0x89ABc3, vb89ABc3x1, va0c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r() 144 vprod0xCDEFc3 = vmlal_s8(vprod0xCDEFc3, vbCDEFc3x1, va0c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r()
|
D | 1x8c2-minmax-fp32-neonv8-mlal-ld4r.c | 95 const int8x8_t va0c3x1 = vreinterpret_s8_s16(va0x1.val[3]); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r() local 99 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0c3x1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r() 103 vprod0x4567c3 = vmlal_s8(vprod0x4567c3, vb4567c3x1, va0c3x1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r()
|
D | 1x8c2-minmax-fp32-neonv8-mlal-ld1r.c | 101 const int8x8_t va0c3x1 = vreinterpret_s8_s16(va03x1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r() local 105 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0c3x1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r() 109 vprod0x4567c3 = vmlal_s8(vprod0x4567c3, vb4567c3x1, va0c3x1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r()
|
D | 1x8c2-minmax-fp32-neon-mlal-ld4r.c | 94 const int8x8_t va0c3x1 = vreinterpret_s8_s16(va0x1.val[3]); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r() local 98 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0c3x1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r() 102 vprod0x4567c3 = vmlal_s8(vprod0x4567c3, vb4567c3x1, va0c3x1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r()
|
D | 1x8c2-minmax-rndnu-neon-mlal-dup.c | 94 const int8x8_t va0c3x1 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 3)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup() local 98 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup() 102 vprod0x4567c3 = vmlal_s8(vprod0x4567c3, vb4567c3x1, va0c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup()
|
D | 1x8c2-minmax-rndnu-neon-mlal-ld2r.c | 96 const int8x8_t va0c3x1 = vreinterpret_s8_s16(va01x1.val[1]); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r() local 100 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r() 104 vprod0x4567c3 = vmlal_s8(vprod0x4567c3, vb4567c3x1, va0c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r()
|
D | 1x8c2-minmax-fp32-neon-mlal-dup.c | 94 const int8x8_t va0c3x1 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 3)); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup() local 98 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0c3x1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup() 102 vprod0x4567c3 = vmlal_s8(vprod0x4567c3, vb4567c3x1, va0c3x1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup()
|
D | 1x8c2-minmax-fp32-neonv8-mlal-dup.c | 95 const int8x8_t va0c3x1 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 3)); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup() local 99 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0c3x1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup() 103 vprod0x4567c3 = vmlal_s8(vprod0x4567c3, vb4567c3x1, va0c3x1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 1x16c2-minmax-rndnu-neon-mlal-ld2r.c | 142 const int8x8_t va0c3x1 = vreinterpret_s8_s16(va01x1.val[1]); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r() local 146 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0c3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r() 150 vprod0x4567c3 = vmlal_s8(vprod0x4567c3, vb4567c3x1, va0c3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r() 154 vprod0x89ABc3 = vmlal_s8(vprod0x89ABc3, vb89ABc3x1, va0c3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r() 158 vprod0xCDEFc3 = vmlal_s8(vprod0xCDEFc3, vbCDEFc3x1, va0c3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r()
|
D | 1x16c2-minmax-rndnu-neon-mlal-ld1r.c | 146 const int8x8_t va0c3x1 = vreinterpret_s8_s16(va03x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r() local 150 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0c3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r() 154 vprod0x4567c3 = vmlal_s8(vprod0x4567c3, vb4567c3x1, va0c3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r() 158 vprod0x89ABc3 = vmlal_s8(vprod0x89ABc3, vb89ABc3x1, va0c3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r() 162 vprod0xCDEFc3 = vmlal_s8(vprod0xCDEFc3, vbCDEFc3x1, va0c3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r()
|
D | 1x16c2-minmax-rndnu-neon-mlal-dup.c | 140 const int8x8_t va0c3x1 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 3)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup() local 144 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0c3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup() 148 vprod0x4567c3 = vmlal_s8(vprod0x4567c3, vb4567c3x1, va0c3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup() 152 vprod0x89ABc3 = vmlal_s8(vprod0x89ABc3, vb89ABc3x1, va0c3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup() 156 vprod0xCDEFc3 = vmlal_s8(vprod0xCDEFc3, vbCDEFc3x1, va0c3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup()
|
D | 1x16c2-minmax-rndnu-neon-mlal-ld4r.c | 140 const int8x8_t va0c3x1 = vreinterpret_s8_s16(va0x1.val[3]); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r() local 144 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0c3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r() 148 vprod0x4567c3 = vmlal_s8(vprod0x4567c3, vb4567c3x1, va0c3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r() 152 vprod0x89ABc3 = vmlal_s8(vprod0x89ABc3, vb89ABc3x1, va0c3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r() 156 vprod0xCDEFc3 = vmlal_s8(vprod0xCDEFc3, vbCDEFc3x1, va0c3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r()
|
D | 1x8c2-minmax-fp32-neon-mlal-ld2r.c | 108 const int8x8_t va0c3x1 = vreinterpret_s8_s16(va01x1.val[1]); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r() local 112 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0c3x1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r() 116 vprod0x4567c3 = vmlal_s8(vprod0x4567c3, vb4567c3x1, va0c3x1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r()
|
D | 1x8c2-minmax-fp32-neonv8-mlal-ld1r.c | 113 const int8x8_t va0c3x1 = vreinterpret_s8_s16(va03x1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r() local 117 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0c3x1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r() 121 vprod0x4567c3 = vmlal_s8(vprod0x4567c3, vb4567c3x1, va0c3x1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r()
|
D | 1x8c2-minmax-fp32-neonv8-mlal-dup.c | 107 const int8x8_t va0c3x1 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 3)); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup() local 111 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0c3x1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup() 115 vprod0x4567c3 = vmlal_s8(vprod0x4567c3, vb4567c3x1, va0c3x1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup()
|
D | 1x8c2-minmax-fp32-neonv8-mlal-ld4r.c | 107 const int8x8_t va0c3x1 = vreinterpret_s8_s16(va0x1.val[3]); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r() local 111 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0c3x1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r() 115 vprod0x4567c3 = vmlal_s8(vprod0x4567c3, vb4567c3x1, va0c3x1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r()
|
D | 1x8c2-minmax-fp32-neon-mlal-dup.c | 106 const int8x8_t va0c3x1 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 3)); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup() local 110 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0c3x1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup() 114 vprod0x4567c3 = vmlal_s8(vprod0x4567c3, vb4567c3x1, va0c3x1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup()
|
D | 1x8c2-minmax-fp32-neonv8-mlal-ld2r.c | 109 const int8x8_t va0c3x1 = vreinterpret_s8_s16(va01x1.val[1]); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r() local 113 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0c3x1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r() 117 vprod0x4567c3 = vmlal_s8(vprod0x4567c3, vb4567c3x1, va0c3x1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r()
|
/external/XNNPACK/src/qc8-igemm/gen/ |
D | 1x8c2-minmax-fp32-neonv8-mlal-ld4r.c | 107 const int8x8_t va0c3x1 = vreinterpret_s8_s16(va0x1.val[3]); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r() local 111 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0c3x1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r() 115 vprod0x4567c3 = vmlal_s8(vprod0x4567c3, vb4567c3x1, va0c3x1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r()
|
D | 1x8c2-minmax-fp32-neonv8-mlal-ld1r.c | 113 const int8x8_t va0c3x1 = vreinterpret_s8_s16(va03x1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r() local 117 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0c3x1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r() 121 vprod0x4567c3 = vmlal_s8(vprod0x4567c3, vb4567c3x1, va0c3x1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r()
|
/external/XNNPACK/src/qc8-gemm/gen/ |
D | 1x8c2-minmax-fp32-neon-mlal-ld4r.c | 94 const int8x8_t va0c3x1 = vreinterpret_s8_s16(va0x1.val[3]); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r() local 98 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0c3x1); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r() 102 vprod0x4567c3 = vmlal_s8(vprod0x4567c3, vb4567c3x1, va0c3x1); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r()
|
D | 1x8c2-minmax-fp32-neonv8-mlal-ld1r.c | 101 const int8x8_t va0c3x1 = vreinterpret_s8_s16(va03x1); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r() local 105 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0c3x1); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r() 109 vprod0x4567c3 = vmlal_s8(vprod0x4567c3, vb4567c3x1, va0c3x1); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r()
|