Home
last modified time | relevance | path

Searched refs:vb01234567c3 (Results 1 – 25 of 70) sorted by relevance

123

/external/XNNPACK/src/f16-gemm/gen/
D8x8-minmax-neonfp16arith-ld64.c197 …const float16x8_t vb01234567c3 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64() local
200 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c3, va0, 3); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
201 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c3, va1, 3); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
202 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c3, va2, 3); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
203 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c3, va3, 3); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
204 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c3, va4, 3); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
205 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c3, va5, 3); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
206 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c3, va6, 3); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
207 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c3, va7, 3); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
218 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c3, vb01234567c3); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
[all …]
D6x8-minmax-neonfp16arith-ld64.c163 …const float16x8_t vb01234567c3 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64() local
166 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c3, va0, 3); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
167 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c3, va1, 3); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
168 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c3, va2, 3); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
169 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c3, va3, 3); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
170 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c3, va4, 3); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
171 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c3, va5, 3); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
180 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c3, vb01234567c3); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
181 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c3, vb01234567c3); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
182 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c3, vb01234567c3); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
[all …]
D4x8-minmax-neonfp16arith-ld64.c129 …const float16x8_t vb01234567c3 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64() local
132 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c3, va0, 3); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
133 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c3, va1, 3); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
134 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c3, va2, 3); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
135 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c3, va3, 3); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
142 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c3, vb01234567c3); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
143 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c3, vb01234567c3); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
144 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c3, vb01234567c3); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
145 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c3, vb01234567c3); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
D6x16-minmax-neonfp16arith-ld64.c208 …const float16x8_t vb01234567c3 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64() local
212 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c3, va0, 3); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
213 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c3, va1, 3); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
214 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c3, va2, 3); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
215 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c3, va3, 3); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
216 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c3, va4, 3); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
217 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c3, va5, 3); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
232 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c3, vb01234567c3); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
233 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c3, vb01234567c3); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
234 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c3, vb01234567c3); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
[all …]
D8x16-minmax-neonfp16arith-ld64.c256 …const float16x8_t vb01234567c3 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() local
260 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c3, va0, 3); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
261 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c3, va1, 3); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
262 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c3, va2, 3); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
263 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c3, va3, 3); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
264 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c3, va4, 3); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
265 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c3, va5, 3); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
266 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c3, va6, 3); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
267 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c3, va7, 3); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
286 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c3, vb01234567c3); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
[all …]
D4x16-minmax-neonfp16arith-ld64.c160 …const float16x8_t vb01234567c3 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64() local
164 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c3, va0, 3); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
165 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c3, va1, 3); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
166 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c3, va2, 3); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
167 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c3, va3, 3); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
178 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c3, vb01234567c3); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
179 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c3, vb01234567c3); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
180 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c3, vb01234567c3); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
181 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c3, vb01234567c3); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
D1x8-minmax-neonfp16arith-ld64.c78 …const float16x8_t vb01234567c3 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64() local
81 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c3, va0, 3); in xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64()
85 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c3, vb01234567c3); in xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64()
/external/XNNPACK/src/f16-igemm/gen/
D8x8-minmax-neonfp16arith-ld64.c229 …const float16x8_t vb01234567c3 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof( float16x… in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64() local
232 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c3, va0, 3); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
233 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c3, va1, 3); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
234 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c3, va2, 3); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
235 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c3, va3, 3); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
236 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c3, va4, 3); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
237 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c3, va5, 3); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
238 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c3, va6, 3); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
239 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c3, va7, 3); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
250 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c3, vb01234567c3); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
[all …]
D6x8-minmax-neonfp16arith-ld64.c189 …const float16x8_t vb01234567c3 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof( float16x… in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64() local
192 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c3, va0, 3); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
193 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c3, va1, 3); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
194 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c3, va2, 3); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
195 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c3, va3, 3); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
196 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c3, va4, 3); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
197 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c3, va5, 3); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
206 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c3, vb01234567c3); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
207 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c3, vb01234567c3); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
208 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c3, vb01234567c3); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
[all …]
D4x8-minmax-neonfp16arith-ld64.c149 …const float16x8_t vb01234567c3 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof( float16x… in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64() local
152 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c3, va0, 3); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
153 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c3, va1, 3); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
154 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c3, va2, 3); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
155 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c3, va3, 3); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
162 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c3, vb01234567c3); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
163 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c3, vb01234567c3); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
164 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c3, vb01234567c3); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
165 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c3, vb01234567c3); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
D8x16-minmax-neonfp16arith-ld64.c288 …const float16x8_t vb01234567c3 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof( float16x… in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() local
292 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c3, va0, 3); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
293 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c3, va1, 3); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
294 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c3, va2, 3); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
295 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c3, va3, 3); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
296 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c3, va4, 3); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
297 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c3, va5, 3); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
298 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c3, va6, 3); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
299 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c3, va7, 3); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
318 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c3, vb01234567c3); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
[all …]
D6x16-minmax-neonfp16arith-ld64.c234 …const float16x8_t vb01234567c3 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof( float16x… in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64() local
238 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c3, va0, 3); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
239 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c3, va1, 3); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
240 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c3, va2, 3); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
241 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c3, va3, 3); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
242 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c3, va4, 3); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
243 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c3, va5, 3); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
258 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c3, vb01234567c3); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
259 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c3, vb01234567c3); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
260 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c3, vb01234567c3); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
[all …]
D4x16-minmax-neonfp16arith-ld64.c180 …const float16x8_t vb01234567c3 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof( float16x… in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64() local
184 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c3, va0, 3); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
185 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c3, va1, 3); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
186 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c3, va2, 3); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
187 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c3, va3, 3); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
198 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c3, vb01234567c3); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
199 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c3, vb01234567c3); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
200 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c3, vb01234567c3); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
201 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c3, vb01234567c3); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
D1x8-minmax-neonfp16arith-ld64.c89 …const float16x8_t vb01234567c3 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof( float16x… in xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64() local
92 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c3, va0, 3); in xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64()
96 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c3, vb01234567c3); in xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64()
/external/XNNPACK/src/f16-gemm/gen-inc/
D8x8inc-minmax-neonfp16arith-ld64.c199 …const float16x8_t vb01234567c3 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64() local
202 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c3, va0, 3); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
203 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c3, va1, 3); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
204 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c3, va2, 3); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
205 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c3, va3, 3); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
206 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c3, va4, 3); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
207 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c3, va5, 3); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
208 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c3, va6, 3); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
209 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c3, va7, 3); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
220 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c3, vb01234567c3); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
[all …]
D6x8inc-minmax-neonfp16arith-ld64.c165 …const float16x8_t vb01234567c3 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64() local
168 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c3, va0, 3); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
169 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c3, va1, 3); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
170 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c3, va2, 3); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
171 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c3, va3, 3); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
172 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c3, va4, 3); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
173 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c3, va5, 3); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
182 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c3, vb01234567c3); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
183 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c3, vb01234567c3); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
184 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c3, vb01234567c3); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
[all …]
D4x8inc-minmax-neonfp16arith-ld64.c131 …const float16x8_t vb01234567c3 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64() local
134 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c3, va0, 3); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
135 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c3, va1, 3); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
136 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c3, va2, 3); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
137 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c3, va3, 3); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
144 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c3, vb01234567c3); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
145 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c3, vb01234567c3); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
146 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c3, vb01234567c3); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
147 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c3, vb01234567c3); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
D8x16inc-minmax-neonfp16arith-ld64.c258 …const float16x8_t vb01234567c3 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64() local
262 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c3, va0, 3); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
263 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c3, va1, 3); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
264 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c3, va2, 3); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
265 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c3, va3, 3); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
266 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c3, va4, 3); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
267 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c3, va5, 3); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
268 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c3, va6, 3); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
269 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c3, va7, 3); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
288 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c3, vb01234567c3); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
[all …]
D6x16inc-minmax-neonfp16arith-ld64.c210 …const float16x8_t vb01234567c3 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64() local
214 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c3, va0, 3); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
215 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c3, va1, 3); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
216 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c3, va2, 3); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
217 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c3, va3, 3); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
218 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c3, va4, 3); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
219 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c3, va5, 3); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
234 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c3, vb01234567c3); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
235 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c3, vb01234567c3); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
236 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c3, vb01234567c3); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
[all …]
D4x16inc-minmax-neonfp16arith-ld64.c162 …const float16x8_t vb01234567c3 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64() local
166 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c3, va0, 3); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
167 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c3, va1, 3); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
168 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c3, va2, 3); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
169 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c3, va3, 3); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
180 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c3, vb01234567c3); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
181 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c3, vb01234567c3); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
182 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c3, vb01234567c3); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
183 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c3, vb01234567c3); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
D1x8inc-minmax-neonfp16arith-ld64.c80 …const float16x8_t vb01234567c3 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemminc_minmax_ukernel_1x8__neonfp16arith_ld64() local
83 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c3, va0, 3); in xnn_f16_gemminc_minmax_ukernel_1x8__neonfp16arith_ld64()
87 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c3, vb01234567c3); in xnn_f16_gemminc_minmax_ukernel_1x8__neonfp16arith_ld64()
/external/XNNPACK/src/f32-gemm/gen-inc/
D4x16s4inc-minmax-fma3-broadcast.c135 const __m256 vb01234567c3 = _mm256_load_ps(w + 48); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast() local
138 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c3, vacc0x01234567); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
139 vacc1x01234567 = _mm256_fmadd_ps(va1, vb01234567c3, vacc1x01234567); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
140 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c3, vacc2x01234567); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
141 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567c3, vacc3x01234567); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
/external/XNNPACK/src/f32-gemm/gen/
D4x16s4-minmax-fma3-broadcast.c133 const __m256 vb01234567c3 = _mm256_load_ps(w + 48); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast() local
136 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c3, vacc0x01234567); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
137 vacc1x01234567 = _mm256_fmadd_ps(va1, vb01234567c3, vacc1x01234567); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
138 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c3, vacc2x01234567); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
139 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567c3, vacc3x01234567); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
D5x16s4-minmax-fma3-broadcast.c152 const __m256 vb01234567c3 = _mm256_load_ps(w + 48); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast() local
155 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c3, vacc0x01234567); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
156 vacc1x01234567 = _mm256_fmadd_ps(va1, vb01234567c3, vacc1x01234567); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
157 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c3, vacc2x01234567); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
158 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567c3, vacc3x01234567); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
159 vacc4x01234567 = _mm256_fmadd_ps(va4, vb01234567c3, vacc4x01234567); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
/external/XNNPACK/src/f32-igemm/gen/
D5x16s4-minmax-fma3-broadcast.c177 const __m256 vb01234567c3 = _mm256_load_ps(w + 48); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast() local
180 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c3, vacc0x01234567); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
181 vacc1x01234567 = _mm256_fmadd_ps(va1, vb01234567c3, vacc1x01234567); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
182 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c3, vacc2x01234567); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
183 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567c3, vacc3x01234567); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
184 vacc4x01234567 = _mm256_fmadd_ps(va4, vb01234567c3, vacc4x01234567); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()

123