Home
last modified time | relevance | path

Searched refs:vb01234567c2 (Results 1 – 25 of 70) sorted by relevance

123

/external/XNNPACK/src/f16-gemm/gen/
D8x8-minmax-neonfp16arith-ld64.c167 …const float16x8_t vb01234567c2 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64() local
170 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
171 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c2, va1, 2); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
172 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
173 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
174 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c2, va4, 2); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
175 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c2, va5, 2); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
176 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c2, va6, 2); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
177 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c2, va7, 2); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
188 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
[all …]
D6x8-minmax-neonfp16arith-ld64.c139 …const float16x8_t vb01234567c2 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64() local
142 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
143 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c2, va1, 2); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
144 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
145 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
146 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c2, va4, 2); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
147 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c2, va5, 2); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
156 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
157 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
158 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
[all …]
D4x8-minmax-neonfp16arith-ld64.c111 …const float16x8_t vb01234567c2 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64() local
114 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
115 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c2, va1, 2); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
116 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
117 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
124 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
125 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
126 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
127 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
D6x16-minmax-neonfp16arith-ld64.c171 …const float16x8_t vb01234567c2 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64() local
175 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
176 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c2, va1, 2); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
177 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
178 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
179 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c2, va4, 2); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
180 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c2, va5, 2); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
195 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
196 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
197 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
[all …]
D8x16-minmax-neonfp16arith-ld64.c209 …const float16x8_t vb01234567c2 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() local
213 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
214 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c2, va1, 2); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
215 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
216 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
217 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c2, va4, 2); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
218 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c2, va5, 2); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
219 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c2, va6, 2); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
220 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c2, va7, 2); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
239 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
[all …]
D4x16-minmax-neonfp16arith-ld64.c133 …const float16x8_t vb01234567c2 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64() local
137 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
138 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c2, va1, 2); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
139 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
140 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
151 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
152 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
153 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
154 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
D1x8-minmax-neonfp16arith-ld64.c69 …const float16x8_t vb01234567c2 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64() local
72 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64()
76 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64()
/external/XNNPACK/src/f16-igemm/gen/
D8x8-minmax-neonfp16arith-ld64.c199 …const float16x8_t vb01234567c2 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof( float16x… in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64() local
202 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
203 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c2, va1, 2); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
204 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
205 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
206 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c2, va4, 2); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
207 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c2, va5, 2); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
208 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c2, va6, 2); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
209 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c2, va7, 2); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
220 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
[all …]
D6x8-minmax-neonfp16arith-ld64.c165 …const float16x8_t vb01234567c2 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof( float16x… in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64() local
168 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
169 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c2, va1, 2); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
170 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
171 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
172 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c2, va4, 2); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
173 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c2, va5, 2); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
182 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
183 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
184 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
[all …]
D4x8-minmax-neonfp16arith-ld64.c131 …const float16x8_t vb01234567c2 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof( float16x… in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64() local
134 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
135 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c2, va1, 2); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
136 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
137 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
144 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
145 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
146 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
147 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
D8x16-minmax-neonfp16arith-ld64.c241 …const float16x8_t vb01234567c2 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof( float16x… in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() local
245 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
246 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c2, va1, 2); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
247 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
248 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
249 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c2, va4, 2); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
250 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c2, va5, 2); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
251 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c2, va6, 2); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
252 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c2, va7, 2); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
271 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
[all …]
D6x16-minmax-neonfp16arith-ld64.c197 …const float16x8_t vb01234567c2 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof( float16x… in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64() local
201 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
202 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c2, va1, 2); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
203 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
204 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
205 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c2, va4, 2); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
206 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c2, va5, 2); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
221 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
222 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
223 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
[all …]
D4x16-minmax-neonfp16arith-ld64.c153 …const float16x8_t vb01234567c2 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof( float16x… in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64() local
157 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
158 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c2, va1, 2); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
159 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
160 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
171 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
172 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
173 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
174 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
D1x8-minmax-neonfp16arith-ld64.c80 …const float16x8_t vb01234567c2 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof( float16x… in xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64() local
83 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64()
87 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64()
/external/XNNPACK/src/f16-gemm/gen-inc/
D8x8inc-minmax-neonfp16arith-ld64.c169 …const float16x8_t vb01234567c2 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64() local
172 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
173 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c2, va1, 2); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
174 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
175 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
176 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c2, va4, 2); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
177 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c2, va5, 2); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
178 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c2, va6, 2); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
179 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c2, va7, 2); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
190 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
[all …]
D6x8inc-minmax-neonfp16arith-ld64.c141 …const float16x8_t vb01234567c2 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64() local
144 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
145 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c2, va1, 2); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
146 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
147 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
148 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c2, va4, 2); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
149 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c2, va5, 2); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
158 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
159 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
160 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
[all …]
D4x8inc-minmax-neonfp16arith-ld64.c113 …const float16x8_t vb01234567c2 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64() local
116 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
117 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c2, va1, 2); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
118 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
119 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
126 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
127 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
128 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
129 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
D8x16inc-minmax-neonfp16arith-ld64.c211 …const float16x8_t vb01234567c2 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64() local
215 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
216 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c2, va1, 2); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
217 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
218 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
219 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c2, va4, 2); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
220 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c2, va5, 2); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
221 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c2, va6, 2); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
222 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c2, va7, 2); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
241 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
[all …]
D6x16inc-minmax-neonfp16arith-ld64.c173 …const float16x8_t vb01234567c2 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64() local
177 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
178 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c2, va1, 2); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
179 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
180 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
181 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c2, va4, 2); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
182 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c2, va5, 2); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
197 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
198 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
199 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
[all …]
D4x16inc-minmax-neonfp16arith-ld64.c135 …const float16x8_t vb01234567c2 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64() local
139 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
140 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c2, va1, 2); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
141 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
142 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
153 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
154 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
155 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
156 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
D1x8inc-minmax-neonfp16arith-ld64.c71 …const float16x8_t vb01234567c2 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemminc_minmax_ukernel_1x8__neonfp16arith_ld64() local
74 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_gemminc_minmax_ukernel_1x8__neonfp16arith_ld64()
78 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_1x8__neonfp16arith_ld64()
/external/XNNPACK/src/f32-gemm/gen-inc/
D4x16s4inc-minmax-fma3-broadcast.c118 const __m256 vb01234567c2 = _mm256_load_ps(w + 32); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast() local
121 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c2, vacc0x01234567); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
122 vacc1x01234567 = _mm256_fmadd_ps(va1, vb01234567c2, vacc1x01234567); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
123 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c2, vacc2x01234567); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
124 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567c2, vacc3x01234567); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
/external/XNNPACK/src/f32-gemm/gen/
D4x16s4-minmax-fma3-broadcast.c116 const __m256 vb01234567c2 = _mm256_load_ps(w + 32); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast() local
119 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c2, vacc0x01234567); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
120 vacc1x01234567 = _mm256_fmadd_ps(va1, vb01234567c2, vacc1x01234567); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
121 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c2, vacc2x01234567); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
122 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567c2, vacc3x01234567); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
D5x16s4-minmax-fma3-broadcast.c132 const __m256 vb01234567c2 = _mm256_load_ps(w + 32); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast() local
135 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c2, vacc0x01234567); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
136 vacc1x01234567 = _mm256_fmadd_ps(va1, vb01234567c2, vacc1x01234567); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
137 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c2, vacc2x01234567); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
138 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567c2, vacc3x01234567); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
139 vacc4x01234567 = _mm256_fmadd_ps(va4, vb01234567c2, vacc4x01234567); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
/external/XNNPACK/src/f32-igemm/gen/
D5x16s4-minmax-fma3-broadcast.c157 const __m256 vb01234567c2 = _mm256_load_ps(w + 32); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast() local
160 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c2, vacc0x01234567); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
161 vacc1x01234567 = _mm256_fmadd_ps(va1, vb01234567c2, vacc1x01234567); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
162 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c2, vacc2x01234567); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
163 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567c2, vacc3x01234567); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
164 vacc4x01234567 = _mm256_fmadd_ps(va4, vb01234567c2, vacc4x01234567); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()

123