Home
last modified time | relevance | path

Searched refs:vb0123xCDEF (Results 1 – 25 of 42) sorted by relevance

12

/external/XNNPACK/src/qs8-gemm/gen/
D8x16c4-minmax-rndnu-neondot.c140 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot() local
150 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot()
154 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot()
158 vacc2xCDEF = vdotq_lane_s32(vacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot()
162 vacc3xCDEF = vdotq_lane_s32(vacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot()
166 vacc4xCDEF = vdotq_lane_s32(vacc4xCDEF, vb0123xCDEF, va4x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot()
170 vacc5xCDEF = vdotq_lane_s32(vacc5xCDEF, vb0123xCDEF, va5x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot()
174 vacc6xCDEF = vdotq_lane_s32(vacc6xCDEF, vb0123xCDEF, va6x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot()
178 vacc7xCDEF = vdotq_lane_s32(vacc7xCDEF, vb0123xCDEF, va7x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot()
230 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot() local
[all …]
D6x16c4-minmax-rndnu-neondot.c118 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot() local
128 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot()
132 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot()
136 vacc2xCDEF = vdotq_lane_s32(vacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot()
140 vacc3xCDEF = vdotq_lane_s32(vacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot()
144 vacc4xCDEF = vdotq_lane_s32(vacc4xCDEF, vb0123xCDEF, va4x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot()
148 vacc5xCDEF = vdotq_lane_s32(vacc5xCDEF, vb0123xCDEF, va5x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot()
190 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot() local
196 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot()
200 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot()
[all …]
D4x16c4-minmax-rndnu-neondot.c96 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot() local
106 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot()
110 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot()
114 vacc2xCDEF = vdotq_lane_s32(vacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot()
118 vacc3xCDEF = vdotq_lane_s32(vacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot()
150 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot() local
156 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot()
160 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot()
164 vacc2xCDEF = vdotq_lane_s32(vacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot()
168 vacc3xCDEF = vdotq_lane_s32(vacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot()
/external/XNNPACK/src/qc8-igemm/gen/
D8x16c4-minmax-fp32-neondot.c163 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot() local
173 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot()
177 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot()
181 vacc2xCDEF = vdotq_lane_s32(vacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot()
185 vacc3xCDEF = vdotq_lane_s32(vacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot()
189 vacc4xCDEF = vdotq_lane_s32(vacc4xCDEF, vb0123xCDEF, va4x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot()
193 vacc5xCDEF = vdotq_lane_s32(vacc5xCDEF, vb0123xCDEF, va5x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot()
197 vacc6xCDEF = vdotq_lane_s32(vacc6xCDEF, vb0123xCDEF, va6x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot()
201 vacc7xCDEF = vdotq_lane_s32(vacc7xCDEF, vb0123xCDEF, va7x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot()
253 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot() local
[all …]
D6x16c4-minmax-fp32-neondot.c137 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot() local
147 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot()
151 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot()
155 vacc2xCDEF = vdotq_lane_s32(vacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot()
159 vacc3xCDEF = vdotq_lane_s32(vacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot()
163 vacc4xCDEF = vdotq_lane_s32(vacc4xCDEF, vb0123xCDEF, va4x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot()
167 vacc5xCDEF = vdotq_lane_s32(vacc5xCDEF, vb0123xCDEF, va5x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot()
209 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot() local
215 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot()
219 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot()
[all …]
D4x16c4-minmax-fp32-neondot.c111 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot() local
121 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot()
125 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot()
129 vacc2xCDEF = vdotq_lane_s32(vacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot()
133 vacc3xCDEF = vdotq_lane_s32(vacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot()
165 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot() local
171 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot()
175 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot()
179 vacc2xCDEF = vdotq_lane_s32(vacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot()
183 vacc3xCDEF = vdotq_lane_s32(vacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot()
/external/XNNPACK/src/qc8-gemm/gen/
D8x16c4-minmax-fp32-neondot.c141 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot() local
151 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot()
155 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot()
159 vacc2xCDEF = vdotq_lane_s32(vacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot()
163 vacc3xCDEF = vdotq_lane_s32(vacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot()
167 vacc4xCDEF = vdotq_lane_s32(vacc4xCDEF, vb0123xCDEF, va4x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot()
171 vacc5xCDEF = vdotq_lane_s32(vacc5xCDEF, vb0123xCDEF, va5x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot()
175 vacc6xCDEF = vdotq_lane_s32(vacc6xCDEF, vb0123xCDEF, va6x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot()
179 vacc7xCDEF = vdotq_lane_s32(vacc7xCDEF, vb0123xCDEF, va7x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot()
231 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot() local
[all …]
D6x16c4-minmax-fp32-neondot.c119 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot() local
129 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot()
133 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot()
137 vacc2xCDEF = vdotq_lane_s32(vacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot()
141 vacc3xCDEF = vdotq_lane_s32(vacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot()
145 vacc4xCDEF = vdotq_lane_s32(vacc4xCDEF, vb0123xCDEF, va4x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot()
149 vacc5xCDEF = vdotq_lane_s32(vacc5xCDEF, vb0123xCDEF, va5x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot()
191 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot() local
197 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot()
201 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot()
[all …]
D4x16c4-minmax-fp32-neondot.c97 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot() local
107 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot()
111 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot()
115 vacc2xCDEF = vdotq_lane_s32(vacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot()
119 vacc3xCDEF = vdotq_lane_s32(vacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot()
151 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot() local
157 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot()
161 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot()
165 vacc2xCDEF = vdotq_lane_s32(vacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot()
169 vacc3xCDEF = vdotq_lane_s32(vacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot()
/external/XNNPACK/src/qs8-igemm/gen/
D8x16c4-minmax-rndnu-neondot.c162 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot() local
172 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot()
176 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot()
180 vacc2xCDEF = vdotq_lane_s32(vacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot()
184 vacc3xCDEF = vdotq_lane_s32(vacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot()
188 vacc4xCDEF = vdotq_lane_s32(vacc4xCDEF, vb0123xCDEF, va4x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot()
192 vacc5xCDEF = vdotq_lane_s32(vacc5xCDEF, vb0123xCDEF, va5x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot()
196 vacc6xCDEF = vdotq_lane_s32(vacc6xCDEF, vb0123xCDEF, va6x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot()
200 vacc7xCDEF = vdotq_lane_s32(vacc7xCDEF, vb0123xCDEF, va7x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot()
252 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot() local
[all …]
D6x16c4-minmax-rndnu-neondot.c136 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot() local
146 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot()
150 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot()
154 vacc2xCDEF = vdotq_lane_s32(vacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot()
158 vacc3xCDEF = vdotq_lane_s32(vacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot()
162 vacc4xCDEF = vdotq_lane_s32(vacc4xCDEF, vb0123xCDEF, va4x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot()
166 vacc5xCDEF = vdotq_lane_s32(vacc5xCDEF, vb0123xCDEF, va5x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot()
208 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot() local
214 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot()
218 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot()
[all …]
D4x16c4-minmax-rndnu-neondot.c110 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot() local
120 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot()
124 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot()
128 vacc2xCDEF = vdotq_lane_s32(vacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot()
132 vacc3xCDEF = vdotq_lane_s32(vacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot()
164 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot() local
170 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot()
174 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot()
178 vacc2xCDEF = vdotq_lane_s32(vacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot()
182 vacc3xCDEF = vdotq_lane_s32(vacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot()
/external/XNNPACK/src/qu8-igemm/gen/
D8x16c4-minmax-rndnu-neondot.c174 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot() local
185 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot()
194 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot()
203 vpacc2xCDEF = vdotq_lane_u32(vpacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot()
212 vpacc3xCDEF = vdotq_lane_u32(vpacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot()
221 vpacc4xCDEF = vdotq_lane_u32(vpacc4xCDEF, vb0123xCDEF, va4x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot()
230 vpacc5xCDEF = vdotq_lane_u32(vpacc5xCDEF, vb0123xCDEF, va5x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot()
239 vpacc6xCDEF = vdotq_lane_u32(vpacc6xCDEF, vb0123xCDEF, va6x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot()
248 vpacc7xCDEF = vdotq_lane_u32(vpacc7xCDEF, vb0123xCDEF, va7x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot()
272 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot() local
[all …]
D5x16c4-minmax-rndnu-neondot.c132 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot() local
143 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot()
152 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot()
161 vpacc2xCDEF = vdotq_lane_u32(vpacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot()
170 vpacc3xCDEF = vdotq_lane_u32(vpacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot()
179 vpacc4xCDEF = vdotq_lane_u32(vpacc4xCDEF, vb0123xCDEF, va4x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot()
200 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot() local
207 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot()
212 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot()
217 vpacc2xCDEF = vdotq_lane_u32(vpacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot()
[all …]
D4x16c4-minmax-fp32-neondot.c119 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot() local
130 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot()
139 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot()
148 vpacc2xCDEF = vdotq_lane_u32(vpacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot()
157 vpacc3xCDEF = vdotq_lane_u32(vpacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot()
177 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot() local
184 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot()
189 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot()
194 vpacc2xCDEF = vdotq_lane_u32(vpacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot()
199 vpacc3xCDEF = vdotq_lane_u32(vpacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot()
D4x16c4-minmax-rndnu-neondot.c118 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot() local
129 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot()
138 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot()
147 vpacc2xCDEF = vdotq_lane_u32(vpacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot()
156 vpacc3xCDEF = vdotq_lane_u32(vpacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot()
176 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot() local
183 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot()
188 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot()
193 vpacc2xCDEF = vdotq_lane_u32(vpacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot()
198 vpacc3xCDEF = vdotq_lane_u32(vpacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot()
D6x16c4-minmax-rndnu-neondot.c146 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot() local
157 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot()
166 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot()
175 vpacc2xCDEF = vdotq_lane_u32(vpacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot()
184 vpacc3xCDEF = vdotq_lane_u32(vpacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot()
193 vpacc4xCDEF = vdotq_lane_u32(vpacc4xCDEF, vb0123xCDEF, va4x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot()
202 vpacc5xCDEF = vdotq_lane_u32(vpacc5xCDEF, vb0123xCDEF, va5x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot()
224 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot() local
231 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot()
236 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot()
[all …]
D3x16c4-minmax-rndnu-neondot.c104 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot() local
115 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot()
124 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot()
133 vpacc2xCDEF = vdotq_lane_u32(vpacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot()
152 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot() local
159 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot()
164 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot()
169 vpacc2xCDEF = vdotq_lane_u32(vpacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot()
D2x16c4-minmax-fp32-neondot.c91 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot() local
102 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot()
111 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot()
129 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot() local
136 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot()
141 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot()
/external/XNNPACK/src/qu8-gemm/gen/
D8x16c4-minmax-rndnu-neondot.c150 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot() local
161 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot()
170 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot()
179 vpacc2xCDEF = vdotq_lane_u32(vpacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot()
188 vpacc3xCDEF = vdotq_lane_u32(vpacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot()
197 vpacc4xCDEF = vdotq_lane_u32(vpacc4xCDEF, vb0123xCDEF, va4x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot()
206 vpacc5xCDEF = vdotq_lane_u32(vpacc5xCDEF, vb0123xCDEF, va5x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot()
215 vpacc6xCDEF = vdotq_lane_u32(vpacc6xCDEF, vb0123xCDEF, va6x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot()
224 vpacc7xCDEF = vdotq_lane_u32(vpacc7xCDEF, vb0123xCDEF, va7x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot()
248 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot() local
[all …]
D5x16c4-minmax-rndnu-neondot.c114 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot() local
125 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot()
134 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot()
143 vpacc2xCDEF = vdotq_lane_u32(vpacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot()
152 vpacc3xCDEF = vdotq_lane_u32(vpacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot()
161 vpacc4xCDEF = vdotq_lane_u32(vpacc4xCDEF, vb0123xCDEF, va4x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot()
182 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot() local
189 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot()
194 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot()
199 vpacc2xCDEF = vdotq_lane_u32(vpacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot()
[all …]
D4x16c4-minmax-fp32-neondot.c103 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__neondot() local
114 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__neondot()
123 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__neondot()
132 vpacc2xCDEF = vdotq_lane_u32(vpacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__neondot()
141 vpacc3xCDEF = vdotq_lane_u32(vpacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__neondot()
161 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__neondot() local
168 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__neondot()
173 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__neondot()
178 vpacc2xCDEF = vdotq_lane_u32(vpacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__neondot()
183 vpacc3xCDEF = vdotq_lane_u32(vpacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__neondot()
D4x16c4-minmax-rndnu-neondot.c102 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot() local
113 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot()
122 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot()
131 vpacc2xCDEF = vdotq_lane_u32(vpacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot()
140 vpacc3xCDEF = vdotq_lane_u32(vpacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot()
160 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot() local
167 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot()
172 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot()
177 vpacc2xCDEF = vdotq_lane_u32(vpacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot()
182 vpacc3xCDEF = vdotq_lane_u32(vpacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot()
D6x16c4-minmax-rndnu-neondot.c126 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16c4__neondot() local
137 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16c4__neondot()
146 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16c4__neondot()
155 vpacc2xCDEF = vdotq_lane_u32(vpacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16c4__neondot()
164 vpacc3xCDEF = vdotq_lane_u32(vpacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16c4__neondot()
173 vpacc4xCDEF = vdotq_lane_u32(vpacc4xCDEF, vb0123xCDEF, va4x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16c4__neondot()
182 vpacc5xCDEF = vdotq_lane_u32(vpacc5xCDEF, vb0123xCDEF, va5x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16c4__neondot()
204 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16c4__neondot() local
211 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16c4__neondot()
216 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16c4__neondot()
[all …]
D3x16c4-minmax-rndnu-neondot.c90 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot() local
101 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot()
110 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot()
119 vpacc2xCDEF = vdotq_lane_u32(vpacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot()
138 const uint8x16_t vb0123xCDEF = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot() local
145 vpacc0xCDEF = vdotq_lane_u32(vpacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot()
150 vpacc1xCDEF = vdotq_lane_u32(vpacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot()
155 vpacc2xCDEF = vdotq_lane_u32(vpacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot()

12