Home
last modified time | relevance | path

Searched refs:va2x0 (Results 1 – 16 of 16) sorted by relevance

/external/XNNPACK/src/qs8-gemm/gen/
D3x16c8-minmax-neon-mlal-padal.c112 const int8x8_t va2x0 = vld1_s8(a2); a2 += 8; in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
135 int16x8_t vprod2x0 = vmull_s8(vb0x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
145 int16x8_t vprod2x1 = vmull_s8(vb1x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
155 int16x8_t vprod2x2 = vmull_s8(vb2x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
165 int16x8_t vprod2x3 = vmull_s8(vb3x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
175 int16x8_t vprod2x4 = vmull_s8(vb4x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
185 int16x8_t vprod2x5 = vmull_s8(vb5x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
195 int16x8_t vprod2x6 = vmull_s8(vb6x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
205 int16x8_t vprod2x7 = vmull_s8(vb7x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
215 int16x8_t vprod2x8 = vmull_s8(vb8x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
[all …]
D3x16c2-minmax-neon-mlal-padal-dup.c76 const int8x8_t va2x0 = vld1_s8(a2); a2 += 8; in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup() local
98 …2x0123c0 = vmull_s8(vb0123c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 0))); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
108 …2x4567c0 = vmull_s8(vb4567c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 0))); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
118 …2x89ABc0 = vmull_s8(vb89ABc0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 0))); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
128 …2xCDEFc0 = vmull_s8(vbCDEFc0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 0))); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
138 …2x0123c1 = vmull_s8(vb0123c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 1))); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
148 …2x4567c1 = vmull_s8(vb4567c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 1))); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
158 …2x89ABc1 = vmull_s8(vb89ABc1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 1))); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
168 …2xCDEFc1 = vmull_s8(vbCDEFc1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 1))); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
178 …2x0123c2 = vmull_s8(vb0123c2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 2))); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
[all …]
D3x8c8-minmax-neon-mlal-padal.c88 const int8x8_t va2x0 = vld1_s8(a2); a2 += 8; in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
103 int16x8_t vprod2x0 = vmull_s8(vb0x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
113 int16x8_t vprod2x1 = vmull_s8(vb1x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
123 int16x8_t vprod2x2 = vmull_s8(vb2x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
133 int16x8_t vprod2x3 = vmull_s8(vb3x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
143 int16x8_t vprod2x4 = vmull_s8(vb4x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
153 int16x8_t vprod2x5 = vmull_s8(vb5x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
163 int16x8_t vprod2x6 = vmull_s8(vb6x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
173 int16x8_t vprod2x7 = vmull_s8(vb7x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D3x8c2-minmax-neon-mlal-padal-dup.c70 const int8x8_t va2x0 = vld1_s8(a2); a2 += 8; in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup() local
84 …2x0123c0 = vmull_s8(vb0123c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 0))); in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
94 …2x4567c0 = vmull_s8(vb4567c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 0))); in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
104 …2x0123c1 = vmull_s8(vb0123c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 1))); in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
114 …2x4567c1 = vmull_s8(vb4567c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 1))); in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
124 …2x0123c2 = vmull_s8(vb0123c2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 2))); in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
134 …2x4567c2 = vmull_s8(vb4567c2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 2))); in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
144 …2x0123c3 = vmull_s8(vb0123c3x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 3))); in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
154 …2x4567c3 = vmull_s8(vb4567c3x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 3))); in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
D4x16c2-minmax-neon-mlal-padal-dup.c86 const int8x8_t va2x0 = vld1_s8(a2); a2 += 8; in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup() local
110 …2x0123c0 = vmull_s8(vb0123c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 0))); in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
123 …2x4567c0 = vmull_s8(vb4567c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 0))); in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
136 …2x89ABc0 = vmull_s8(vb89ABc0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 0))); in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
149 …2xCDEFc0 = vmull_s8(vbCDEFc0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 0))); in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
162 …2x0123c1 = vmull_s8(vb0123c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 1))); in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
175 …2x4567c1 = vmull_s8(vb4567c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 1))); in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
188 …2x89ABc1 = vmull_s8(vb89ABc1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 1))); in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
201 …2xCDEFc1 = vmull_s8(vbCDEFc1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 1))); in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
214 …2x0123c2 = vmull_s8(vb0123c2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 2))); in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
[all …]
D4x16c8-minmax-neon-mlal-padal.c134 const int8x8_t va2x0 = vld1_s8(a2); a2 += 8; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
159 int16x8_t vprod2x0 = vmull_s8(vb0x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
172 int16x8_t vprod2x1 = vmull_s8(vb1x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
185 int16x8_t vprod2x2 = vmull_s8(vb2x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
198 int16x8_t vprod2x3 = vmull_s8(vb3x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
211 int16x8_t vprod2x4 = vmull_s8(vb4x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
224 int16x8_t vprod2x5 = vmull_s8(vb5x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
237 int16x8_t vprod2x6 = vmull_s8(vb6x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
250 int16x8_t vprod2x7 = vmull_s8(vb7x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
263 int16x8_t vprod2x8 = vmull_s8(vb8x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
[all …]
D4x8c8-minmax-neon-mlal-padal.c102 const int8x8_t va2x0 = vld1_s8(a2); a2 += 8; in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
119 int16x8_t vprod2x0 = vmull_s8(vb0x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
132 int16x8_t vprod2x1 = vmull_s8(vb1x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
145 int16x8_t vprod2x2 = vmull_s8(vb2x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
158 int16x8_t vprod2x3 = vmull_s8(vb3x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
171 int16x8_t vprod2x4 = vmull_s8(vb4x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
184 int16x8_t vprod2x5 = vmull_s8(vb5x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
197 int16x8_t vprod2x6 = vmull_s8(vb6x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
210 int16x8_t vprod2x7 = vmull_s8(vb7x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
D4x8c2-minmax-neon-mlal-padal-dup.c78 const int8x8_t va2x0 = vld1_s8(a2); a2 += 8; in xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup() local
94 …2x0123c0 = vmull_s8(vb0123c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 0))); in xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup()
107 …2x4567c0 = vmull_s8(vb4567c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 0))); in xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup()
120 …2x0123c1 = vmull_s8(vb0123c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 1))); in xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup()
133 …2x4567c1 = vmull_s8(vb4567c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 1))); in xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup()
146 …2x0123c2 = vmull_s8(vb0123c2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 2))); in xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup()
159 …2x4567c2 = vmull_s8(vb4567c2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 2))); in xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup()
172 …2x0123c3 = vmull_s8(vb0123c3x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 3))); in xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup()
185 …2x4567c3 = vmull_s8(vb4567c3x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 3))); in xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup()
/external/XNNPACK/src/qs8-igemm/gen/
D3x16c2-minmax-neon-mlal-padal-dup.c91 const int8x8_t va2x0 = vld1_s8(a2); a2 += 8; in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup() local
113 …2x0123c0 = vmull_s8(vb0123c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 0))); in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
123 …2x4567c0 = vmull_s8(vb4567c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 0))); in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
133 …2x89ABc0 = vmull_s8(vb89ABc0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 0))); in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
143 …2xCDEFc0 = vmull_s8(vbCDEFc0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 0))); in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
153 …2x0123c1 = vmull_s8(vb0123c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 1))); in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
163 …2x4567c1 = vmull_s8(vb4567c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 1))); in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
173 …2x89ABc1 = vmull_s8(vb89ABc1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 1))); in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
183 …2xCDEFc1 = vmull_s8(vbCDEFc1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 1))); in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
193 …2x0123c2 = vmull_s8(vb0123c2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 2))); in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
[all …]
D3x16c8-minmax-neon-mlal-padal.c127 const int8x8_t va2x0 = vld1_s8(a2); a2 += 8; in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
150 int16x8_t vprod2x0 = vmull_s8(vb0x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
160 int16x8_t vprod2x1 = vmull_s8(vb1x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
170 int16x8_t vprod2x2 = vmull_s8(vb2x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
180 int16x8_t vprod2x3 = vmull_s8(vb3x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
190 int16x8_t vprod2x4 = vmull_s8(vb4x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
200 int16x8_t vprod2x5 = vmull_s8(vb5x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
210 int16x8_t vprod2x6 = vmull_s8(vb6x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
220 int16x8_t vprod2x7 = vmull_s8(vb7x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
230 int16x8_t vprod2x8 = vmull_s8(vb8x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
[all …]
D3x8c8-minmax-neon-mlal-padal.c103 const int8x8_t va2x0 = vld1_s8(a2); a2 += 8; in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
118 int16x8_t vprod2x0 = vmull_s8(vb0x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
128 int16x8_t vprod2x1 = vmull_s8(vb1x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
138 int16x8_t vprod2x2 = vmull_s8(vb2x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
148 int16x8_t vprod2x3 = vmull_s8(vb3x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
158 int16x8_t vprod2x4 = vmull_s8(vb4x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
168 int16x8_t vprod2x5 = vmull_s8(vb5x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
178 int16x8_t vprod2x6 = vmull_s8(vb6x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
188 int16x8_t vprod2x7 = vmull_s8(vb7x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D3x8c2-minmax-neon-mlal-padal-dup.c85 const int8x8_t va2x0 = vld1_s8(a2); a2 += 8; in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup() local
99 …2x0123c0 = vmull_s8(vb0123c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 0))); in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
109 …2x4567c0 = vmull_s8(vb4567c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 0))); in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
119 …2x0123c1 = vmull_s8(vb0123c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 1))); in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
129 …2x4567c1 = vmull_s8(vb4567c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 1))); in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
139 …2x0123c2 = vmull_s8(vb0123c2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 2))); in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
149 …2x4567c2 = vmull_s8(vb4567c2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 2))); in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
159 …2x0123c3 = vmull_s8(vb0123c3x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 3))); in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
169 …2x4567c3 = vmull_s8(vb4567c3x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 3))); in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
D4x16c2-minmax-neon-mlal-padal-dup.c103 const int8x8_t va2x0 = vld1_s8(a2); a2 += 8; in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup() local
127 …2x0123c0 = vmull_s8(vb0123c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 0))); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
140 …2x4567c0 = vmull_s8(vb4567c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 0))); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
153 …2x89ABc0 = vmull_s8(vb89ABc0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 0))); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
166 …2xCDEFc0 = vmull_s8(vbCDEFc0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 0))); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
179 …2x0123c1 = vmull_s8(vb0123c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 1))); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
192 …2x4567c1 = vmull_s8(vb4567c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 1))); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
205 …2x89ABc1 = vmull_s8(vb89ABc1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 1))); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
218 …2xCDEFc1 = vmull_s8(vbCDEFc1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 1))); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
231 …2x0123c2 = vmull_s8(vb0123c2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 2))); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
[all …]
D4x16c8-minmax-neon-mlal-padal.c151 const int8x8_t va2x0 = vld1_s8(a2); a2 += 8; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
176 int16x8_t vprod2x0 = vmull_s8(vb0x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
189 int16x8_t vprod2x1 = vmull_s8(vb1x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
202 int16x8_t vprod2x2 = vmull_s8(vb2x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
215 int16x8_t vprod2x3 = vmull_s8(vb3x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
228 int16x8_t vprod2x4 = vmull_s8(vb4x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
241 int16x8_t vprod2x5 = vmull_s8(vb5x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
254 int16x8_t vprod2x6 = vmull_s8(vb6x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
267 int16x8_t vprod2x7 = vmull_s8(vb7x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
280 int16x8_t vprod2x8 = vmull_s8(vb8x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
[all …]
D4x8c2-minmax-neon-mlal-padal-dup.c95 const int8x8_t va2x0 = vld1_s8(a2); a2 += 8; in xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup() local
111 …2x0123c0 = vmull_s8(vb0123c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 0))); in xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup()
124 …2x4567c0 = vmull_s8(vb4567c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 0))); in xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup()
137 …2x0123c1 = vmull_s8(vb0123c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 1))); in xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup()
150 …2x4567c1 = vmull_s8(vb4567c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 1))); in xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup()
163 …2x0123c2 = vmull_s8(vb0123c2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 2))); in xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup()
176 …2x4567c2 = vmull_s8(vb4567c2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 2))); in xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup()
189 …2x0123c3 = vmull_s8(vb0123c3x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 3))); in xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup()
202 …2x4567c3 = vmull_s8(vb4567c3x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 3))); in xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup()
D4x8c8-minmax-neon-mlal-padal.c119 const int8x8_t va2x0 = vld1_s8(a2); a2 += 8; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
136 int16x8_t vprod2x0 = vmull_s8(vb0x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
149 int16x8_t vprod2x1 = vmull_s8(vb1x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
162 int16x8_t vprod2x2 = vmull_s8(vb2x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
175 int16x8_t vprod2x3 = vmull_s8(vb3x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
188 int16x8_t vprod2x4 = vmull_s8(vb4x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
201 int16x8_t vprod2x5 = vmull_s8(vb5x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
214 int16x8_t vprod2x6 = vmull_s8(vb6x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
227 int16x8_t vprod2x7 = vmull_s8(vb7x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()