Home
last modified time | relevance | path

Searched refs:vacc3x89ABCDEF (Results 1 – 25 of 47) sorted by relevance

12

/external/XNNPACK/src/f16-igemm/gen/
D4x16-minmax-neonfp16arith-ld64.c66 float16x8_t vacc3x89ABCDEF = vacc0x89ABCDEF; in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64() local
110 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc0, va3, 0); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
124 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3c0, vb89ABCDEFc0); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
137 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc1, va3, 1); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
151 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3c1, vb89ABCDEFc1); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
164 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc2, va3, 2); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
178 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3c2, vb89ABCDEFc2); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
191 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc3, va3, 3); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
205 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3c3, vb89ABCDEFc3); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
225 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3, vb89ABCDEF); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
[all …]
D6x16-minmax-neonfp16arith-ld64.c74 float16x8_t vacc3x89ABCDEF = vacc0x89ABCDEF; in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64() local
136 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc0, va3, 0); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
156 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3c0, vb89ABCDEFc0); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
173 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc1, va3, 1); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
193 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3c1, vb89ABCDEFc1); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
210 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc2, va3, 2); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
230 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3c2, vb89ABCDEFc2); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
247 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc3, va3, 3); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
267 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3c3, vb89ABCDEFc3); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
293 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3, vb89ABCDEF); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
[all …]
D8x16-minmax-neonfp16arith-ld64.c82 float16x8_t vacc3x89ABCDEF = vacc0x89ABCDEF; in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() local
162 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc0, va3, 0); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
188 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3c0, vb89ABCDEFc0); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
209 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc1, va3, 1); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
235 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3c1, vb89ABCDEFc1); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
256 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc2, va3, 2); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
282 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3c2, vb89ABCDEFc2); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
303 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc3, va3, 3); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
329 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3c3, vb89ABCDEFc3); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
361 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3, vb89ABCDEF); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
[all …]
/external/XNNPACK/src/f16-gemm/gen-inc/
D4x16inc-minmax-neonfp16arith-ld64.c72 …float16x8_t vacc3x89ABCDEF = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64() local
92 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc0, va3, 0); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
106 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3c0, vb89ABCDEFc0); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
119 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc1, va3, 1); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
133 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3c1, vb89ABCDEFc1); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
146 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc2, va3, 2); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
160 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3c2, vb89ABCDEFc2); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
173 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc3, va3, 3); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
187 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3c3, vb89ABCDEFc3); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
209 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3, vb89ABCDEF); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
[all …]
D6x16inc-minmax-neonfp16arith-ld64.c84 …float16x8_t vacc3x89ABCDEF = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64() local
112 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc0, va3, 0); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
132 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3c0, vb89ABCDEFc0); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
149 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc1, va3, 1); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
169 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3c1, vb89ABCDEFc1); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
186 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc2, va3, 2); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
206 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3c2, vb89ABCDEFc2); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
223 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc3, va3, 3); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
243 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3c3, vb89ABCDEFc3); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
271 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3, vb89ABCDEF); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
[all …]
D8x16inc-minmax-neonfp16arith-ld64.c96 …float16x8_t vacc3x89ABCDEF = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64() local
132 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc0, va3, 0); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
158 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3c0, vb89ABCDEFc0); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
179 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc1, va3, 1); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
205 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3c1, vb89ABCDEFc1); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
226 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc2, va3, 2); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
252 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3c2, vb89ABCDEFc2); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
273 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc3, va3, 3); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
299 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3c3, vb89ABCDEFc3); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
333 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3, vb89ABCDEF); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
[all …]
/external/XNNPACK/src/f16-gemm/gen/
D4x16-minmax-neonfp16arith-ld64.c70 float16x8_t vacc3x89ABCDEF = vacc0x89ABCDEF; in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64() local
90 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc0, va3, 0); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
104 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3c0, vb89ABCDEFc0); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
117 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc1, va3, 1); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
131 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3c1, vb89ABCDEFc1); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
144 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc2, va3, 2); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
158 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3c2, vb89ABCDEFc2); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
171 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc3, va3, 3); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
185 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3c3, vb89ABCDEFc3); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
207 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3, vb89ABCDEF); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
[all …]
D6x16-minmax-neonfp16arith-ld64.c82 float16x8_t vacc3x89ABCDEF = vacc0x89ABCDEF; in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64() local
110 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc0, va3, 0); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
130 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3c0, vb89ABCDEFc0); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
147 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc1, va3, 1); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
167 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3c1, vb89ABCDEFc1); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
184 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc2, va3, 2); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
204 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3c2, vb89ABCDEFc2); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
221 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc3, va3, 3); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
241 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3c3, vb89ABCDEFc3); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
269 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3, vb89ABCDEF); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
[all …]
D8x16-minmax-neonfp16arith-ld64.c94 float16x8_t vacc3x89ABCDEF = vacc0x89ABCDEF; in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() local
130 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc0, va3, 0); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
156 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3c0, vb89ABCDEFc0); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
177 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc1, va3, 1); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
203 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3c1, vb89ABCDEFc1); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
224 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc2, va3, 2); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
250 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3c2, vb89ABCDEFc2); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
271 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc3, va3, 3); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
297 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3c3, vb89ABCDEFc3); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
331 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3, vb89ABCDEF); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
[all …]
/external/XNNPACK/src/f32-gemm/gen-inc/
D4x16s4inc-minmax-fma3-broadcast.c69 __m256 vacc3x89ABCDEF = _mm256_load_ps(acc + 56); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast() local
94 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEFc0, vacc3x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
111 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEFc1, vacc3x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
128 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEFc2, vacc3x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
145 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEFc3, vacc3x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
173 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEF, vacc3x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
187 vacc3x89ABCDEF = _mm256_min_ps(vacc3x89ABCDEF, vmax); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
197 vacc3x89ABCDEF = _mm256_max_ps(vacc3x89ABCDEF, vmin); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
201 _mm256_storeu_ps(c3 + 8, vacc3x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
226 vacc3x01234567 = vacc3x89ABCDEF; in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
D5x16s4inc-minmax-fma3-broadcast.c75 __m256 vacc3x89ABCDEF = _mm256_load_ps(acc + 56); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast() local
105 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEFc0, vacc3x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
125 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEFc1, vacc3x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
145 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEFc2, vacc3x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
165 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEFc3, vacc3x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
197 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEF, vacc3x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
213 vacc3x89ABCDEF = _mm256_min_ps(vacc3x89ABCDEF, vmax); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
225 vacc3x89ABCDEF = _mm256_max_ps(vacc3x89ABCDEF, vmin); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
233 _mm256_storeu_ps(c3 + 8, vacc3x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
261 vacc3x01234567 = vacc3x89ABCDEF; in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
D4x16inc-minmax-fma3-broadcast.c69 __m256 vacc3x89ABCDEF = _mm256_load_ps(acc + 56); in xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast() local
94 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEF, vacc3x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast()
107 vacc3x89ABCDEF = _mm256_min_ps(vacc3x89ABCDEF, vmax); in xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast()
117 vacc3x89ABCDEF = _mm256_max_ps(vacc3x89ABCDEF, vmin); in xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast()
121 _mm256_storeu_ps(c3 + 8, vacc3x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast()
146 vacc3x01234567 = vacc3x89ABCDEF; in xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast()
D4x16inc-minmax-avx-broadcast.c69 __m256 vacc3x89ABCDEF = _mm256_load_ps(acc + 56); in xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast() local
94 vacc3x89ABCDEF = _mm256_add_ps(vacc3x89ABCDEF, _mm256_mul_ps(va3, vb89ABCDEF)); in xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast()
107 vacc3x89ABCDEF = _mm256_min_ps(vacc3x89ABCDEF, vmax); in xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast()
117 vacc3x89ABCDEF = _mm256_max_ps(vacc3x89ABCDEF, vmin); in xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast()
121 _mm256_storeu_ps(c3 + 8, vacc3x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast()
146 vacc3x01234567 = vacc3x89ABCDEF; in xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast()
D5x16inc-minmax-avx-broadcast.c75 __m256 vacc3x89ABCDEF = _mm256_load_ps(acc + 56); in xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast() local
105 vacc3x89ABCDEF = _mm256_add_ps(vacc3x89ABCDEF, _mm256_mul_ps(va3, vb89ABCDEF)); in xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast()
120 vacc3x89ABCDEF = _mm256_min_ps(vacc3x89ABCDEF, vmax); in xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast()
132 vacc3x89ABCDEF = _mm256_max_ps(vacc3x89ABCDEF, vmin); in xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast()
140 _mm256_storeu_ps(c3 + 8, vacc3x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast()
168 vacc3x01234567 = vacc3x89ABCDEF; in xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast()
D5x16inc-minmax-fma3-broadcast.c75 __m256 vacc3x89ABCDEF = _mm256_load_ps(acc + 56); in xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast() local
105 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEF, vacc3x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast()
120 vacc3x89ABCDEF = _mm256_min_ps(vacc3x89ABCDEF, vmax); in xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast()
132 vacc3x89ABCDEF = _mm256_max_ps(vacc3x89ABCDEF, vmin); in xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast()
140 _mm256_storeu_ps(c3 + 8, vacc3x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast()
168 vacc3x01234567 = vacc3x89ABCDEF; in xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast()
/external/XNNPACK/src/f32-gemm/gen/
D4x16s4-minmax-fma3-broadcast.c67 __m256 vacc3x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast() local
92 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEFc0, vacc3x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
109 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEFc1, vacc3x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
126 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEFc2, vacc3x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
143 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEFc3, vacc3x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
171 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEF, vacc3x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
185 vacc3x89ABCDEF = _mm256_min_ps(vacc3x89ABCDEF, vmax); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
195 vacc3x89ABCDEF = _mm256_max_ps(vacc3x89ABCDEF, vmin); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
199 _mm256_storeu_ps(c3 + 8, vacc3x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
224 vacc3x01234567 = vacc3x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
D5x16s4-minmax-fma3-broadcast.c73 __m256 vacc3x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast() local
103 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEFc0, vacc3x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
123 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEFc1, vacc3x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
143 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEFc2, vacc3x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
163 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEFc3, vacc3x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
195 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEF, vacc3x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
211 vacc3x89ABCDEF = _mm256_min_ps(vacc3x89ABCDEF, vmax); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
223 vacc3x89ABCDEF = _mm256_max_ps(vacc3x89ABCDEF, vmin); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
231 _mm256_storeu_ps(c3 + 8, vacc3x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
259 vacc3x01234567 = vacc3x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
D4x16-minmax-avx-broadcast.c67 __m256 vacc3x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_4x16__avx_broadcast() local
92 vacc3x89ABCDEF = _mm256_add_ps(vacc3x89ABCDEF, _mm256_mul_ps(va3, vb89ABCDEF)); in xnn_f32_gemm_minmax_ukernel_4x16__avx_broadcast()
105 vacc3x89ABCDEF = _mm256_min_ps(vacc3x89ABCDEF, vmax); in xnn_f32_gemm_minmax_ukernel_4x16__avx_broadcast()
115 vacc3x89ABCDEF = _mm256_max_ps(vacc3x89ABCDEF, vmin); in xnn_f32_gemm_minmax_ukernel_4x16__avx_broadcast()
119 _mm256_storeu_ps(c3 + 8, vacc3x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_4x16__avx_broadcast()
144 vacc3x01234567 = vacc3x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_4x16__avx_broadcast()
D4x16-minmax-fma3-broadcast.c67 __m256 vacc3x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_4x16__fma3_broadcast() local
92 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEF, vacc3x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_4x16__fma3_broadcast()
105 vacc3x89ABCDEF = _mm256_min_ps(vacc3x89ABCDEF, vmax); in xnn_f32_gemm_minmax_ukernel_4x16__fma3_broadcast()
115 vacc3x89ABCDEF = _mm256_max_ps(vacc3x89ABCDEF, vmin); in xnn_f32_gemm_minmax_ukernel_4x16__fma3_broadcast()
119 _mm256_storeu_ps(c3 + 8, vacc3x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_4x16__fma3_broadcast()
144 vacc3x01234567 = vacc3x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_4x16__fma3_broadcast()
/external/XNNPACK/src/f32-igemm/gen/
D4x16s4-minmax-fma3-broadcast.c65 __m256 vacc3x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast() local
114 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEFc0, vacc3x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
131 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEFc1, vacc3x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
148 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEFc2, vacc3x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
165 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEFc3, vacc3x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
193 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEF, vacc3x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
209 vacc3x89ABCDEF = _mm256_min_ps(vacc3x89ABCDEF, vmax); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
219 vacc3x89ABCDEF = _mm256_max_ps(vacc3x89ABCDEF, vmin); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
223 _mm256_storeu_ps(c3 + 8, vacc3x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
244 vacc3x01234567 = vacc3x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
D5x16s4-minmax-fma3-broadcast.c69 __m256 vacc3x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast() local
128 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEFc0, vacc3x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
148 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEFc1, vacc3x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
168 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEFc2, vacc3x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
188 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEFc3, vacc3x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
220 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEF, vacc3x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
238 vacc3x89ABCDEF = _mm256_min_ps(vacc3x89ABCDEF, vmax); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
250 vacc3x89ABCDEF = _mm256_max_ps(vacc3x89ABCDEF, vmin); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
258 _mm256_storeu_ps(c3 + 8, vacc3x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
281 vacc3x01234567 = vacc3x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
D4x16-minmax-avx-broadcast.c65 __m256 vacc3x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast() local
114 vacc3x89ABCDEF = _mm256_add_ps(vacc3x89ABCDEF, _mm256_mul_ps(va3, vb89ABCDEF)); in xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast()
128 vacc3x89ABCDEF = _mm256_min_ps(vacc3x89ABCDEF, vmax); in xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast()
138 vacc3x89ABCDEF = _mm256_max_ps(vacc3x89ABCDEF, vmin); in xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast()
142 _mm256_storeu_ps(c3 + 8, vacc3x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast()
163 vacc3x01234567 = vacc3x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast()
D4x16-minmax-fma3-broadcast.c65 __m256 vacc3x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast() local
114 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEF, vacc3x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast()
128 vacc3x89ABCDEF = _mm256_min_ps(vacc3x89ABCDEF, vmax); in xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast()
138 vacc3x89ABCDEF = _mm256_max_ps(vacc3x89ABCDEF, vmin); in xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast()
142 _mm256_storeu_ps(c3 + 8, vacc3x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast()
163 vacc3x01234567 = vacc3x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast()
D5x16-minmax-avx-broadcast.c69 __m256 vacc3x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast() local
127 vacc3x89ABCDEF = _mm256_add_ps(vacc3x89ABCDEF, _mm256_mul_ps(va3, vb89ABCDEF)); in xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast()
144 vacc3x89ABCDEF = _mm256_min_ps(vacc3x89ABCDEF, vmax); in xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast()
156 vacc3x89ABCDEF = _mm256_max_ps(vacc3x89ABCDEF, vmin); in xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast()
164 _mm256_storeu_ps(c3 + 8, vacc3x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast()
187 vacc3x01234567 = vacc3x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast()
D5x16-minmax-fma3-broadcast.c69 __m256 vacc3x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast() local
127 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEF, vacc3x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast()
144 vacc3x89ABCDEF = _mm256_min_ps(vacc3x89ABCDEF, vmax); in xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast()
156 vacc3x89ABCDEF = _mm256_max_ps(vacc3x89ABCDEF, vmin); in xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast()
164 _mm256_storeu_ps(c3 + 8, vacc3x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast()
187 vacc3x01234567 = vacc3x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast()

12