Home
last modified time | relevance | path

Searched refs:vb01234567c2 (Results 1 – 25 of 213) sorted by relevance

123456789

/external/XNNPACK/src/f16-gemm/gen-inc/
D8x8inc-minmax-neonfp16arith-ld64.c169 …const float16x8_t vb01234567c2 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64() local
172 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
173 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c2, va1, 2); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
174 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
175 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
176 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c2, va4, 2); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
177 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c2, va5, 2); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
178 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c2, va6, 2); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
179 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c2, va7, 2); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
190 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
[all …]
D6x8inc-minmax-neonfp16arith-ld64.c141 …const float16x8_t vb01234567c2 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64() local
144 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
145 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c2, va1, 2); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
146 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
147 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
148 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c2, va4, 2); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
149 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c2, va5, 2); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
158 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
159 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
160 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
[all …]
D4x8inc-minmax-neonfp16arith-ld64.c113 …const float16x8_t vb01234567c2 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64() local
116 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
117 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c2, va1, 2); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
118 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
119 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
126 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
127 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
128 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
129 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
D6x16inc-minmax-neonfp16arith-ld64.c173 …const float16x8_t vb01234567c2 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64() local
177 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
178 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c2, va1, 2); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
179 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
180 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
181 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c2, va4, 2); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
182 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c2, va5, 2); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
197 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
198 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
199 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
[all …]
/external/XNNPACK/src/f16-igemm/gen/
D8x8-minmax-neonfp16arith-ld64.c199 …const float16x8_t vb01234567c2 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64() local
202 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
203 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c2, va1, 2); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
204 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
205 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
206 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c2, va4, 2); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
207 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c2, va5, 2); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
208 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c2, va6, 2); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
209 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c2, va7, 2); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
220 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
[all …]
D6x8-minmax-neonfp16arith-ld64.c165 …const float16x8_t vb01234567c2 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64() local
168 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
169 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c2, va1, 2); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
170 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
171 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
172 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c2, va4, 2); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
173 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c2, va5, 2); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
182 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
183 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
184 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
[all …]
D4x8-minmax-neonfp16arith-ld64.c131 …const float16x8_t vb01234567c2 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64() local
134 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
135 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c2, va1, 2); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
136 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
137 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
144 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
145 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
146 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
147 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
D8x16-minmax-neonfp16arith-ld64.c241 …const float16x8_t vb01234567c2 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() local
245 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
246 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c2, va1, 2); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
247 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
248 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
249 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c2, va4, 2); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
250 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c2, va5, 2); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
251 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c2, va6, 2); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
252 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c2, va7, 2); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
271 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
[all …]
D6x16-minmax-neonfp16arith-ld64.c197 …const float16x8_t vb01234567c2 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64() local
201 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
202 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c2, va1, 2); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
203 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
204 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
205 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c2, va4, 2); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
206 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c2, va5, 2); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
221 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
222 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
223 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
[all …]
/external/XNNPACK/src/f16-gemm/gen/
D8x8-minmax-neonfp16arith-ld64.c167 …const float16x8_t vb01234567c2 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64() local
170 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
171 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c2, va1, 2); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
172 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
173 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
174 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c2, va4, 2); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
175 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c2, va5, 2); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
176 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c2, va6, 2); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
177 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c2, va7, 2); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
188 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
[all …]
D6x8-minmax-neonfp16arith-ld64.c139 …const float16x8_t vb01234567c2 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64() local
142 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
143 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c2, va1, 2); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
144 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
145 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
146 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c2, va4, 2); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
147 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c2, va5, 2); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
156 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
157 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
158 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
[all …]
D4x8-minmax-neonfp16arith-ld64.c111 …const float16x8_t vb01234567c2 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64() local
114 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
115 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c2, va1, 2); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
116 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
117 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
124 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
125 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
126 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
127 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
D8x16-minmax-neonfp16arith-ld64.c209 …const float16x8_t vb01234567c2 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() local
213 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
214 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c2, va1, 2); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
215 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
216 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
217 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c2, va4, 2); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
218 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c2, va5, 2); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
219 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c2, va6, 2); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
220 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c2, va7, 2); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
239 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
[all …]
D6x16-minmax-neonfp16arith-ld64.c171 …const float16x8_t vb01234567c2 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64() local
175 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
176 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c2, va1, 2); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
177 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
178 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
179 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c2, va4, 2); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
180 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c2, va5, 2); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
195 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
196 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
197 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
[all …]
/external/XNNPACK/src/f32-gemm/gen/
D5x16s4-minmax-fma3-broadcast.c132 const __m256 vb01234567c2 = _mm256_load_ps(w + 32); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast() local
135 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c2, vacc0x01234567); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
136 vacc1x01234567 = _mm256_fmadd_ps(va1, vb01234567c2, vacc1x01234567); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
137 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c2, vacc2x01234567); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
138 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567c2, vacc3x01234567); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
139 vacc4x01234567 = _mm256_fmadd_ps(va4, vb01234567c2, vacc4x01234567); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
224 const __m256 vb01234567c2 = _mm256_load_ps(w + 32); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast() local
227 … = _mm256_fmadd_ps(_mm256_and_ps(va0, _mm256_cmp_ps(vb01234567c2, vzero, _CMP_NEQ_OQ)), vb01234567… in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
228 … = _mm256_fmadd_ps(_mm256_and_ps(va1, _mm256_cmp_ps(vb01234567c2, vzero, _CMP_NEQ_OQ)), vb01234567… in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
229 … = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb01234567c2, vzero, _CMP_NEQ_OQ)), vb01234567… in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
[all …]
D4x16s4-minmax-fma3-broadcast.c116 const __m256 vb01234567c2 = _mm256_load_ps(w + 32); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast() local
119 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c2, vacc0x01234567); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
120 vacc1x01234567 = _mm256_fmadd_ps(va1, vb01234567c2, vacc1x01234567); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
121 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c2, vacc2x01234567); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
122 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567c2, vacc3x01234567); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
195 const __m256 vb01234567c2 = _mm256_load_ps(w + 32); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast() local
198 … = _mm256_fmadd_ps(_mm256_and_ps(va0, _mm256_cmp_ps(vb01234567c2, vzero, _CMP_NEQ_OQ)), vb01234567… in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
199 … = _mm256_fmadd_ps(_mm256_and_ps(va1, _mm256_cmp_ps(vb01234567c2, vzero, _CMP_NEQ_OQ)), vb01234567… in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
200 … = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb01234567c2, vzero, _CMP_NEQ_OQ)), vb01234567… in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
201 … = _mm256_fmadd_ps(_mm256_and_ps(va3, _mm256_cmp_ps(vb01234567c2, vzero, _CMP_NEQ_OQ)), vb01234567… in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
D3x16s4-minmax-fma3-broadcast.c100 const __m256 vb01234567c2 = _mm256_load_ps(w + 32); in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast() local
103 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c2, vacc0x01234567); in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast()
104 vacc1x01234567 = _mm256_fmadd_ps(va1, vb01234567c2, vacc1x01234567); in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast()
105 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c2, vacc2x01234567); in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast()
166 const __m256 vb01234567c2 = _mm256_load_ps(w + 32); in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast() local
169 … = _mm256_fmadd_ps(_mm256_and_ps(va0, _mm256_cmp_ps(vb01234567c2, vzero, _CMP_NEQ_OQ)), vb01234567… in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast()
170 … = _mm256_fmadd_ps(_mm256_and_ps(va1, _mm256_cmp_ps(vb01234567c2, vzero, _CMP_NEQ_OQ)), vb01234567… in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast()
171 … = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb01234567c2, vzero, _CMP_NEQ_OQ)), vb01234567… in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast()
D1x16s4-minmax-fma3-broadcast.c68 const __m256 vb01234567c2 = _mm256_load_ps(w + 32); in xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast() local
71 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c2, vacc0x01234567); in xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast()
108 const __m256 vb01234567c2 = _mm256_load_ps(w + 32); in xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast() local
111 … = _mm256_fmadd_ps(_mm256_and_ps(va0, _mm256_cmp_ps(vb01234567c2, vzero, _CMP_NEQ_OQ)), vb01234567… in xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast()
/external/XNNPACK/src/f32-gemm/gen-inc/
D5x16s4inc-minmax-fma3-broadcast.c134 const __m256 vb01234567c2 = _mm256_load_ps(w + 32); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast() local
137 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c2, vacc0x01234567); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
138 vacc1x01234567 = _mm256_fmadd_ps(va1, vb01234567c2, vacc1x01234567); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
139 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c2, vacc2x01234567); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
140 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567c2, vacc3x01234567); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
141 vacc4x01234567 = _mm256_fmadd_ps(va4, vb01234567c2, vacc4x01234567); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
226 const __m256 vb01234567c2 = _mm256_load_ps(w + 32); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast() local
229 … = _mm256_fmadd_ps(_mm256_and_ps(va0, _mm256_cmp_ps(vb01234567c2, vzero, _CMP_NEQ_OQ)), vb01234567… in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
230 … = _mm256_fmadd_ps(_mm256_and_ps(va1, _mm256_cmp_ps(vb01234567c2, vzero, _CMP_NEQ_OQ)), vb01234567… in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
231 … = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb01234567c2, vzero, _CMP_NEQ_OQ)), vb01234567… in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
[all …]
D4x16s4inc-minmax-fma3-broadcast.c118 const __m256 vb01234567c2 = _mm256_load_ps(w + 32); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast() local
121 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c2, vacc0x01234567); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
122 vacc1x01234567 = _mm256_fmadd_ps(va1, vb01234567c2, vacc1x01234567); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
123 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c2, vacc2x01234567); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
124 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567c2, vacc3x01234567); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
197 const __m256 vb01234567c2 = _mm256_load_ps(w + 32); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast() local
200 … = _mm256_fmadd_ps(_mm256_and_ps(va0, _mm256_cmp_ps(vb01234567c2, vzero, _CMP_NEQ_OQ)), vb01234567… in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
201 … = _mm256_fmadd_ps(_mm256_and_ps(va1, _mm256_cmp_ps(vb01234567c2, vzero, _CMP_NEQ_OQ)), vb01234567… in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
202 … = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb01234567c2, vzero, _CMP_NEQ_OQ)), vb01234567… in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
203 … = _mm256_fmadd_ps(_mm256_and_ps(va3, _mm256_cmp_ps(vb01234567c2, vzero, _CMP_NEQ_OQ)), vb01234567… in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
D3x16s4inc-minmax-fma3-broadcast.c102 const __m256 vb01234567c2 = _mm256_load_ps(w + 32); in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast() local
105 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c2, vacc0x01234567); in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast()
106 vacc1x01234567 = _mm256_fmadd_ps(va1, vb01234567c2, vacc1x01234567); in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast()
107 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c2, vacc2x01234567); in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast()
168 const __m256 vb01234567c2 = _mm256_load_ps(w + 32); in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast() local
171 … = _mm256_fmadd_ps(_mm256_and_ps(va0, _mm256_cmp_ps(vb01234567c2, vzero, _CMP_NEQ_OQ)), vb01234567… in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast()
172 … = _mm256_fmadd_ps(_mm256_and_ps(va1, _mm256_cmp_ps(vb01234567c2, vzero, _CMP_NEQ_OQ)), vb01234567… in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast()
173 … = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb01234567c2, vzero, _CMP_NEQ_OQ)), vb01234567… in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast()
D1x16s4inc-minmax-fma3-broadcast.c70 const __m256 vb01234567c2 = _mm256_load_ps(w + 32); in xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast() local
73 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c2, vacc0x01234567); in xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast()
110 const __m256 vb01234567c2 = _mm256_load_ps(w + 32); in xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast() local
113 … = _mm256_fmadd_ps(_mm256_and_ps(va0, _mm256_cmp_ps(vb01234567c2, vzero, _CMP_NEQ_OQ)), vb01234567… in xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast()
/external/XNNPACK/src/f32-igemm/gen/
D5x16s4-minmax-fma3-broadcast.c157 const __m256 vb01234567c2 = _mm256_load_ps(w + 32); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast() local
160 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c2, vacc0x01234567); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
161 vacc1x01234567 = _mm256_fmadd_ps(va1, vb01234567c2, vacc1x01234567); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
162 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c2, vacc2x01234567); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
163 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567c2, vacc3x01234567); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
164 vacc4x01234567 = _mm256_fmadd_ps(va4, vb01234567c2, vacc4x01234567); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
249 const __m256 vb01234567c2 = _mm256_load_ps(w + 32); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast() local
252 … = _mm256_fmadd_ps(_mm256_and_ps(va0, _mm256_cmp_ps(vb01234567c2, vzero, _CMP_NEQ_OQ)), vb01234567… in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
253 … = _mm256_fmadd_ps(_mm256_and_ps(va1, _mm256_cmp_ps(vb01234567c2, vzero, _CMP_NEQ_OQ)), vb01234567… in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
254 … = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb01234567c2, vzero, _CMP_NEQ_OQ)), vb01234567… in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
[all …]
D4x16s4-minmax-fma3-broadcast.c138 const __m256 vb01234567c2 = _mm256_load_ps(w + 32); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast() local
141 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c2, vacc0x01234567); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
142 vacc1x01234567 = _mm256_fmadd_ps(va1, vb01234567c2, vacc1x01234567); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
143 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c2, vacc2x01234567); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
144 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567c2, vacc3x01234567); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
217 const __m256 vb01234567c2 = _mm256_load_ps(w + 32); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast() local
220 … = _mm256_fmadd_ps(_mm256_and_ps(va0, _mm256_cmp_ps(vb01234567c2, vzero, _CMP_NEQ_OQ)), vb01234567… in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
221 … = _mm256_fmadd_ps(_mm256_and_ps(va1, _mm256_cmp_ps(vb01234567c2, vzero, _CMP_NEQ_OQ)), vb01234567… in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
222 … = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb01234567c2, vzero, _CMP_NEQ_OQ)), vb01234567… in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
223 … = _mm256_fmadd_ps(_mm256_and_ps(va3, _mm256_cmp_ps(vb01234567c2, vzero, _CMP_NEQ_OQ)), vb01234567… in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
D3x16s4-minmax-fma3-broadcast.c119 const __m256 vb01234567c2 = _mm256_load_ps(w + 32); in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast() local
122 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c2, vacc0x01234567); in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast()
123 vacc1x01234567 = _mm256_fmadd_ps(va1, vb01234567c2, vacc1x01234567); in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast()
124 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c2, vacc2x01234567); in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast()
185 const __m256 vb01234567c2 = _mm256_load_ps(w + 32); in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast() local
188 … = _mm256_fmadd_ps(_mm256_and_ps(va0, _mm256_cmp_ps(vb01234567c2, vzero, _CMP_NEQ_OQ)), vb01234567… in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast()
189 … = _mm256_fmadd_ps(_mm256_and_ps(va1, _mm256_cmp_ps(vb01234567c2, vzero, _CMP_NEQ_OQ)), vb01234567… in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast()
190 … = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb01234567c2, vzero, _CMP_NEQ_OQ)), vb01234567… in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast()

123456789