/external/XNNPACK/src/qs8-gemm/gen/ |
D | 1x16c2-minmax-rndnu-neon-mlal-ld2r.c | 129 const int8x8_t va0c3x0 = vreinterpret_s8_s16(va01x0.val[1]); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r() local 132 int16x8_t vprod0x0123c3 = vmull_s8(vb0123c3x0, va0c3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r() 136 int16x8_t vprod0x4567c3 = vmull_s8(vb4567c3x0, va0c3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r() 140 int16x8_t vprod0x89ABc3 = vmull_s8(vb89ABc3x0, va0c3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r() 144 int16x8_t vprod0xCDEFc3 = vmull_s8(vbCDEFc3x0, va0c3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r()
|
D | 1x16c2-minmax-rndnu-neon-mlal-dup.c | 127 const int8x8_t va0c3x0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 3)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup() local 130 int16x8_t vprod0x0123c3 = vmull_s8(vb0123c3x0, va0c3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup() 134 int16x8_t vprod0x4567c3 = vmull_s8(vb4567c3x0, va0c3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup() 138 int16x8_t vprod0x89ABc3 = vmull_s8(vb89ABc3x0, va0c3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup() 142 int16x8_t vprod0xCDEFc3 = vmull_s8(vbCDEFc3x0, va0c3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup()
|
D | 1x16c2-minmax-rndnu-neon-mlal-ld1r.c | 133 const int8x8_t va0c3x0 = vreinterpret_s8_s16(va03x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r() local 136 int16x8_t vprod0x0123c3 = vmull_s8(vb0123c3x0, va0c3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r() 140 int16x8_t vprod0x4567c3 = vmull_s8(vb4567c3x0, va0c3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r() 144 int16x8_t vprod0x89ABc3 = vmull_s8(vb89ABc3x0, va0c3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r() 148 int16x8_t vprod0xCDEFc3 = vmull_s8(vbCDEFc3x0, va0c3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r()
|
D | 1x16c2-minmax-rndnu-neon-mlal-ld4r.c | 127 const int8x8_t va0c3x0 = vreinterpret_s8_s16(va0x0.val[3]); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r() local 130 int16x8_t vprod0x0123c3 = vmull_s8(vb0123c3x0, va0c3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r() 134 int16x8_t vprod0x4567c3 = vmull_s8(vb4567c3x0, va0c3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r() 138 int16x8_t vprod0x89ABc3 = vmull_s8(vb89ABc3x0, va0c3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r() 142 int16x8_t vprod0xCDEFc3 = vmull_s8(vbCDEFc3x0, va0c3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r()
|
D | 1x8c2-minmax-fp32-neonv8-mlal-ld4r.c | 94 const int8x8_t va0c3x0 = vreinterpret_s8_s16(va0x0.val[3]); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r() local 97 int16x8_t vprod0x0123c3 = vmull_s8(vb0123c3x0, va0c3x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r() 101 int16x8_t vprod0x4567c3 = vmull_s8(vb4567c3x0, va0c3x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r()
|
D | 1x8c2-minmax-fp32-neonv8-mlal-ld1r.c | 100 const int8x8_t va0c3x0 = vreinterpret_s8_s16(va03x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r() local 103 int16x8_t vprod0x0123c3 = vmull_s8(vb0123c3x0, va0c3x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r() 107 int16x8_t vprod0x4567c3 = vmull_s8(vb4567c3x0, va0c3x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r()
|
D | 1x8c2-minmax-fp32-neon-mlal-ld4r.c | 93 const int8x8_t va0c3x0 = vreinterpret_s8_s16(va0x0.val[3]); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r() local 96 int16x8_t vprod0x0123c3 = vmull_s8(vb0123c3x0, va0c3x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r() 100 int16x8_t vprod0x4567c3 = vmull_s8(vb4567c3x0, va0c3x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r()
|
D | 1x8c2-minmax-rndnu-neon-mlal-dup.c | 93 const int8x8_t va0c3x0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 3)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup() local 96 int16x8_t vprod0x0123c3 = vmull_s8(vb0123c3x0, va0c3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup() 100 int16x8_t vprod0x4567c3 = vmull_s8(vb4567c3x0, va0c3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup()
|
D | 1x8c2-minmax-rndnu-neon-mlal-ld2r.c | 95 const int8x8_t va0c3x0 = vreinterpret_s8_s16(va01x0.val[1]); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r() local 98 int16x8_t vprod0x0123c3 = vmull_s8(vb0123c3x0, va0c3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r() 102 int16x8_t vprod0x4567c3 = vmull_s8(vb4567c3x0, va0c3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r()
|
D | 1x8c2-minmax-fp32-neon-mlal-dup.c | 93 const int8x8_t va0c3x0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 3)); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup() local 96 int16x8_t vprod0x0123c3 = vmull_s8(vb0123c3x0, va0c3x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup() 100 int16x8_t vprod0x4567c3 = vmull_s8(vb4567c3x0, va0c3x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup()
|
D | 1x8c2-minmax-fp32-neonv8-mlal-dup.c | 94 const int8x8_t va0c3x0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 3)); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup() local 97 int16x8_t vprod0x0123c3 = vmull_s8(vb0123c3x0, va0c3x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup() 101 int16x8_t vprod0x4567c3 = vmull_s8(vb4567c3x0, va0c3x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 1x16c2-minmax-rndnu-neon-mlal-ld2r.c | 141 const int8x8_t va0c3x0 = vreinterpret_s8_s16(va01x0.val[1]); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r() local 144 int16x8_t vprod0x0123c3 = vmull_s8(vb0123c3x0, va0c3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r() 148 int16x8_t vprod0x4567c3 = vmull_s8(vb4567c3x0, va0c3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r() 152 int16x8_t vprod0x89ABc3 = vmull_s8(vb89ABc3x0, va0c3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r() 156 int16x8_t vprod0xCDEFc3 = vmull_s8(vbCDEFc3x0, va0c3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r()
|
D | 1x16c2-minmax-rndnu-neon-mlal-ld1r.c | 145 const int8x8_t va0c3x0 = vreinterpret_s8_s16(va03x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r() local 148 int16x8_t vprod0x0123c3 = vmull_s8(vb0123c3x0, va0c3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r() 152 int16x8_t vprod0x4567c3 = vmull_s8(vb4567c3x0, va0c3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r() 156 int16x8_t vprod0x89ABc3 = vmull_s8(vb89ABc3x0, va0c3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r() 160 int16x8_t vprod0xCDEFc3 = vmull_s8(vbCDEFc3x0, va0c3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r()
|
D | 1x16c2-minmax-rndnu-neon-mlal-dup.c | 139 const int8x8_t va0c3x0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 3)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup() local 142 int16x8_t vprod0x0123c3 = vmull_s8(vb0123c3x0, va0c3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup() 146 int16x8_t vprod0x4567c3 = vmull_s8(vb4567c3x0, va0c3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup() 150 int16x8_t vprod0x89ABc3 = vmull_s8(vb89ABc3x0, va0c3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup() 154 int16x8_t vprod0xCDEFc3 = vmull_s8(vbCDEFc3x0, va0c3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup()
|
D | 1x16c2-minmax-rndnu-neon-mlal-ld4r.c | 139 const int8x8_t va0c3x0 = vreinterpret_s8_s16(va0x0.val[3]); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r() local 142 int16x8_t vprod0x0123c3 = vmull_s8(vb0123c3x0, va0c3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r() 146 int16x8_t vprod0x4567c3 = vmull_s8(vb4567c3x0, va0c3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r() 150 int16x8_t vprod0x89ABc3 = vmull_s8(vb89ABc3x0, va0c3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r() 154 int16x8_t vprod0xCDEFc3 = vmull_s8(vbCDEFc3x0, va0c3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r()
|
D | 1x8c2-minmax-fp32-neon-mlal-ld2r.c | 107 const int8x8_t va0c3x0 = vreinterpret_s8_s16(va01x0.val[1]); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r() local 110 int16x8_t vprod0x0123c3 = vmull_s8(vb0123c3x0, va0c3x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r() 114 int16x8_t vprod0x4567c3 = vmull_s8(vb4567c3x0, va0c3x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r()
|
D | 1x8c2-minmax-fp32-neonv8-mlal-ld1r.c | 112 const int8x8_t va0c3x0 = vreinterpret_s8_s16(va03x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r() local 115 int16x8_t vprod0x0123c3 = vmull_s8(vb0123c3x0, va0c3x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r() 119 int16x8_t vprod0x4567c3 = vmull_s8(vb4567c3x0, va0c3x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r()
|
D | 1x8c2-minmax-fp32-neonv8-mlal-dup.c | 106 const int8x8_t va0c3x0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 3)); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup() local 109 int16x8_t vprod0x0123c3 = vmull_s8(vb0123c3x0, va0c3x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup() 113 int16x8_t vprod0x4567c3 = vmull_s8(vb4567c3x0, va0c3x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup()
|
D | 1x8c2-minmax-fp32-neonv8-mlal-ld4r.c | 106 const int8x8_t va0c3x0 = vreinterpret_s8_s16(va0x0.val[3]); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r() local 109 int16x8_t vprod0x0123c3 = vmull_s8(vb0123c3x0, va0c3x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r() 113 int16x8_t vprod0x4567c3 = vmull_s8(vb4567c3x0, va0c3x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r()
|
D | 1x8c2-minmax-fp32-neon-mlal-dup.c | 105 const int8x8_t va0c3x0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 3)); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup() local 108 int16x8_t vprod0x0123c3 = vmull_s8(vb0123c3x0, va0c3x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup() 112 int16x8_t vprod0x4567c3 = vmull_s8(vb4567c3x0, va0c3x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup()
|
D | 1x8c2-minmax-fp32-neonv8-mlal-ld2r.c | 108 const int8x8_t va0c3x0 = vreinterpret_s8_s16(va01x0.val[1]); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r() local 111 int16x8_t vprod0x0123c3 = vmull_s8(vb0123c3x0, va0c3x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r() 115 int16x8_t vprod0x4567c3 = vmull_s8(vb4567c3x0, va0c3x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r()
|
/external/XNNPACK/src/qc8-igemm/gen/ |
D | 1x8c2-minmax-fp32-neonv8-mlal-ld4r.c | 106 const int8x8_t va0c3x0 = vreinterpret_s8_s16(va0x0.val[3]); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r() local 109 int16x8_t vprod0x0123c3 = vmull_s8(vb0123c3x0, va0c3x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r() 113 int16x8_t vprod0x4567c3 = vmull_s8(vb4567c3x0, va0c3x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r()
|
D | 1x8c2-minmax-fp32-neonv8-mlal-ld1r.c | 112 const int8x8_t va0c3x0 = vreinterpret_s8_s16(va03x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r() local 115 int16x8_t vprod0x0123c3 = vmull_s8(vb0123c3x0, va0c3x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r() 119 int16x8_t vprod0x4567c3 = vmull_s8(vb4567c3x0, va0c3x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r()
|
/external/XNNPACK/src/qc8-gemm/gen/ |
D | 1x8c2-minmax-fp32-neon-mlal-ld4r.c | 93 const int8x8_t va0c3x0 = vreinterpret_s8_s16(va0x0.val[3]); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r() local 96 int16x8_t vprod0x0123c3 = vmull_s8(vb0123c3x0, va0c3x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r() 100 int16x8_t vprod0x4567c3 = vmull_s8(vb4567c3x0, va0c3x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r()
|
D | 1x8c2-minmax-fp32-neonv8-mlal-ld1r.c | 100 const int8x8_t va0c3x0 = vreinterpret_s8_s16(va03x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r() local 103 int16x8_t vprod0x0123c3 = vmull_s8(vb0123c3x0, va0c3x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r() 107 int16x8_t vprod0x4567c3 = vmull_s8(vb4567c3x0, va0c3x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r()
|