Home
last modified time | relevance | path

Searched refs:vacc2x89ABCDEF (Results 1 – 25 of 70) sorted by relevance

123

/external/XNNPACK/src/f16-igemm/gen/
D4x16-minmax-neonfp16arith-ld64.c64 float16x8_t vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64() local
109 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc0, va2, 0); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
123 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c0, vb89ABCDEFc0); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
136 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc1, va2, 1); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
150 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c1, vb89ABCDEFc1); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
163 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc2, va2, 2); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
177 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c2, vb89ABCDEFc2); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
190 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc3, va2, 3); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
204 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c3, vb89ABCDEFc3); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
224 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2, vb89ABCDEF); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
[all …]
D6x16-minmax-neonfp16arith-ld64.c72 float16x8_t vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64() local
135 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc0, va2, 0); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
155 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c0, vb89ABCDEFc0); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
172 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc1, va2, 1); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
192 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c1, vb89ABCDEFc1); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
209 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc2, va2, 2); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
229 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c2, vb89ABCDEFc2); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
246 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc3, va2, 3); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
266 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c3, vb89ABCDEFc3); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
292 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2, vb89ABCDEF); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
[all …]
/external/XNNPACK/src/f16-gemm/gen-inc/
D4x16inc-minmax-neonfp16arith-ld64.c70 …float16x8_t vacc2x89ABCDEF = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64() local
91 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc0, va2, 0); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
105 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c0, vb89ABCDEFc0); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
118 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc1, va2, 1); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
132 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c1, vb89ABCDEFc1); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
145 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc2, va2, 2); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
159 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c2, vb89ABCDEFc2); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
172 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc3, va2, 3); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
186 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c3, vb89ABCDEFc3); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
208 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2, vb89ABCDEF); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
[all …]
D6x16inc-minmax-neonfp16arith-ld64.c82 …float16x8_t vacc2x89ABCDEF = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64() local
111 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc0, va2, 0); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
131 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c0, vb89ABCDEFc0); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
148 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc1, va2, 1); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
168 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c1, vb89ABCDEFc1); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
185 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc2, va2, 2); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
205 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c2, vb89ABCDEFc2); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
222 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc3, va2, 3); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
242 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c3, vb89ABCDEFc3); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
270 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2, vb89ABCDEF); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
[all …]
/external/XNNPACK/src/f16-gemm/gen/
D4x16-minmax-neonfp16arith-ld64.c68 float16x8_t vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64() local
89 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc0, va2, 0); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
103 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c0, vb89ABCDEFc0); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
116 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc1, va2, 1); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
130 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c1, vb89ABCDEFc1); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
143 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc2, va2, 2); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
157 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c2, vb89ABCDEFc2); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
170 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc3, va2, 3); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
184 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c3, vb89ABCDEFc3); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
206 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2, vb89ABCDEF); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
[all …]
D6x16-minmax-neonfp16arith-ld64.c80 float16x8_t vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64() local
109 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc0, va2, 0); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
129 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c0, vb89ABCDEFc0); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
146 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc1, va2, 1); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
166 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c1, vb89ABCDEFc1); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
183 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc2, va2, 2); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
203 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c2, vb89ABCDEFc2); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
220 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc3, va2, 3); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
240 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c3, vb89ABCDEFc3); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
268 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2, vb89ABCDEF); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
[all …]
/external/XNNPACK/src/f32-igemm/gen/
D3x16s4-minmax-fma3-broadcast.c59 __m256 vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast() local
99 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc0, vacc2x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast()
113 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc1, vacc2x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast()
127 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc2, vacc2x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast()
141 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc3, vacc2x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast()
165 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEF, vacc2x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast()
179 vacc2x89ABCDEF = _mm256_min_ps(vacc2x89ABCDEF, vmax); in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast()
187 vacc2x89ABCDEF = _mm256_max_ps(vacc2x89ABCDEF, vmin); in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast()
191 _mm256_storeu_ps(c2 + 8, vacc2x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast()
208 vacc2x01234567 = vacc2x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast()
D4x16s4-minmax-fma3-broadcast.c63 __m256 vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast() local
113 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc0, vacc2x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
130 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc1, vacc2x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
147 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc2, vacc2x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
164 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc3, vacc2x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
192 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEF, vacc2x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
208 vacc2x89ABCDEF = _mm256_min_ps(vacc2x89ABCDEF, vmax); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
218 vacc2x89ABCDEF = _mm256_max_ps(vacc2x89ABCDEF, vmin); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
226 _mm256_storeu_ps(c2 + 8, vacc2x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
245 vacc2x01234567 = vacc2x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
D5x16s4-minmax-fma3-broadcast.c67 __m256 vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast() local
127 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc0, vacc2x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
147 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc1, vacc2x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
167 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc2, vacc2x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
187 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc3, vacc2x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
219 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEF, vacc2x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
237 vacc2x89ABCDEF = _mm256_min_ps(vacc2x89ABCDEF, vmax); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
249 vacc2x89ABCDEF = _mm256_max_ps(vacc2x89ABCDEF, vmin); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
261 _mm256_storeu_ps(c2 + 8, vacc2x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
282 vacc2x01234567 = vacc2x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
D3x16-minmax-fma3-broadcast.c59 __m256 vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast() local
99 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEF, vacc2x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast()
111 vacc2x89ABCDEF = _mm256_min_ps(vacc2x89ABCDEF, vmax); in xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast()
119 vacc2x89ABCDEF = _mm256_max_ps(vacc2x89ABCDEF, vmin); in xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast()
123 _mm256_storeu_ps(c2 + 8, vacc2x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast()
140 vacc2x01234567 = vacc2x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast()
D3x16-minmax-avx-broadcast.c59 __m256 vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast() local
99 vacc2x89ABCDEF = _mm256_add_ps(vacc2x89ABCDEF, _mm256_mul_ps(va2, vb89ABCDEF)); in xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast()
111 vacc2x89ABCDEF = _mm256_min_ps(vacc2x89ABCDEF, vmax); in xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast()
119 vacc2x89ABCDEF = _mm256_max_ps(vacc2x89ABCDEF, vmin); in xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast()
123 _mm256_storeu_ps(c2 + 8, vacc2x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast()
140 vacc2x01234567 = vacc2x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast()
/external/XNNPACK/src/f32-gemm/gen-inc/
D3x16s4inc-minmax-fma3-broadcast.c61 __m256 vacc2x89ABCDEF = _mm256_load_ps(acc + 40); in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast() local
82 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc0, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast()
96 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc1, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast()
110 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc2, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast()
124 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc3, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast()
148 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEF, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast()
160 vacc2x89ABCDEF = _mm256_min_ps(vacc2x89ABCDEF, vmax); in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast()
168 vacc2x89ABCDEF = _mm256_max_ps(vacc2x89ABCDEF, vmin); in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast()
172 _mm256_storeu_ps(c2 + 8, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast()
192 vacc2x01234567 = vacc2x89ABCDEF; in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast()
D4x16s4inc-minmax-fma3-broadcast.c67 __m256 vacc2x89ABCDEF = _mm256_load_ps(acc + 40); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast() local
93 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc0, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
110 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc1, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
127 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc2, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
144 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc3, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
172 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEF, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
186 vacc2x89ABCDEF = _mm256_min_ps(vacc2x89ABCDEF, vmax); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
196 vacc2x89ABCDEF = _mm256_max_ps(vacc2x89ABCDEF, vmin); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
204 _mm256_storeu_ps(c2 + 8, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
227 vacc2x01234567 = vacc2x89ABCDEF; in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
D3x16inc-minmax-fma3-broadcast.c61 __m256 vacc2x89ABCDEF = _mm256_load_ps(acc + 40); in xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast() local
82 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEF, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast()
93 vacc2x89ABCDEF = _mm256_min_ps(vacc2x89ABCDEF, vmax); in xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast()
101 vacc2x89ABCDEF = _mm256_max_ps(vacc2x89ABCDEF, vmin); in xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast()
105 _mm256_storeu_ps(c2 + 8, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast()
125 vacc2x01234567 = vacc2x89ABCDEF; in xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast()
D3x16inc-minmax-avx-broadcast.c61 __m256 vacc2x89ABCDEF = _mm256_load_ps(acc + 40); in xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast() local
82 vacc2x89ABCDEF = _mm256_add_ps(vacc2x89ABCDEF, _mm256_mul_ps(va2, vb89ABCDEF)); in xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast()
93 vacc2x89ABCDEF = _mm256_min_ps(vacc2x89ABCDEF, vmax); in xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast()
101 vacc2x89ABCDEF = _mm256_max_ps(vacc2x89ABCDEF, vmin); in xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast()
105 _mm256_storeu_ps(c2 + 8, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast()
125 vacc2x01234567 = vacc2x89ABCDEF; in xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast()
D5x16s4inc-minmax-fma3-broadcast.c73 __m256 vacc2x89ABCDEF = _mm256_load_ps(acc + 40); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast() local
104 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc0, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
124 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc1, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
144 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc2, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
164 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc3, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
196 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEF, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
212 vacc2x89ABCDEF = _mm256_min_ps(vacc2x89ABCDEF, vmax); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
224 vacc2x89ABCDEF = _mm256_max_ps(vacc2x89ABCDEF, vmin); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
236 _mm256_storeu_ps(c2 + 8, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
262 vacc2x01234567 = vacc2x89ABCDEF; in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
D4x16inc-minmax-fma3-broadcast.c67 __m256 vacc2x89ABCDEF = _mm256_load_ps(acc + 40); in xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast() local
93 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEF, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast()
106 vacc2x89ABCDEF = _mm256_min_ps(vacc2x89ABCDEF, vmax); in xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast()
116 vacc2x89ABCDEF = _mm256_max_ps(vacc2x89ABCDEF, vmin); in xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast()
124 _mm256_storeu_ps(c2 + 8, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast()
147 vacc2x01234567 = vacc2x89ABCDEF; in xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast()
D4x16inc-minmax-avx-broadcast.c67 __m256 vacc2x89ABCDEF = _mm256_load_ps(acc + 40); in xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast() local
93 vacc2x89ABCDEF = _mm256_add_ps(vacc2x89ABCDEF, _mm256_mul_ps(va2, vb89ABCDEF)); in xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast()
106 vacc2x89ABCDEF = _mm256_min_ps(vacc2x89ABCDEF, vmax); in xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast()
116 vacc2x89ABCDEF = _mm256_max_ps(vacc2x89ABCDEF, vmin); in xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast()
124 _mm256_storeu_ps(c2 + 8, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast()
147 vacc2x01234567 = vacc2x89ABCDEF; in xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast()
/external/XNNPACK/src/f32-gemm/gen/
D3x16s4-minmax-fma3-broadcast.c59 __m256 vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast() local
80 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc0, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast()
94 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc1, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast()
108 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc2, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast()
122 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc3, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast()
146 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEF, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast()
158 vacc2x89ABCDEF = _mm256_min_ps(vacc2x89ABCDEF, vmax); in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast()
166 vacc2x89ABCDEF = _mm256_max_ps(vacc2x89ABCDEF, vmin); in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast()
170 _mm256_storeu_ps(c2 + 8, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast()
190 vacc2x01234567 = vacc2x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast()
D4x16s4-minmax-fma3-broadcast.c65 __m256 vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast() local
91 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc0, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
108 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc1, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
125 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc2, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
142 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc3, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
170 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEF, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
184 vacc2x89ABCDEF = _mm256_min_ps(vacc2x89ABCDEF, vmax); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
194 vacc2x89ABCDEF = _mm256_max_ps(vacc2x89ABCDEF, vmin); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
202 _mm256_storeu_ps(c2 + 8, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
225 vacc2x01234567 = vacc2x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
D3x16-minmax-avx-broadcast.c59 __m256 vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast() local
80 vacc2x89ABCDEF = _mm256_add_ps(vacc2x89ABCDEF, _mm256_mul_ps(va2, vb89ABCDEF)); in xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast()
91 vacc2x89ABCDEF = _mm256_min_ps(vacc2x89ABCDEF, vmax); in xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast()
99 vacc2x89ABCDEF = _mm256_max_ps(vacc2x89ABCDEF, vmin); in xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast()
103 _mm256_storeu_ps(c2 + 8, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast()
123 vacc2x01234567 = vacc2x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast()
D3x16-minmax-fma3-broadcast.c59 __m256 vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast() local
80 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEF, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast()
91 vacc2x89ABCDEF = _mm256_min_ps(vacc2x89ABCDEF, vmax); in xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast()
99 vacc2x89ABCDEF = _mm256_max_ps(vacc2x89ABCDEF, vmin); in xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast()
103 _mm256_storeu_ps(c2 + 8, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast()
123 vacc2x01234567 = vacc2x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast()
D5x16s4-minmax-fma3-broadcast.c71 __m256 vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast() local
102 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc0, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
122 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc1, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
142 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc2, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
162 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc3, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
194 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEF, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
210 vacc2x89ABCDEF = _mm256_min_ps(vacc2x89ABCDEF, vmax); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
222 vacc2x89ABCDEF = _mm256_max_ps(vacc2x89ABCDEF, vmin); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
234 _mm256_storeu_ps(c2 + 8, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
260 vacc2x01234567 = vacc2x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
D4x16-minmax-avx-broadcast.c65 __m256 vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_4x16__avx_broadcast() local
91 vacc2x89ABCDEF = _mm256_add_ps(vacc2x89ABCDEF, _mm256_mul_ps(va2, vb89ABCDEF)); in xnn_f32_gemm_minmax_ukernel_4x16__avx_broadcast()
104 vacc2x89ABCDEF = _mm256_min_ps(vacc2x89ABCDEF, vmax); in xnn_f32_gemm_minmax_ukernel_4x16__avx_broadcast()
114 vacc2x89ABCDEF = _mm256_max_ps(vacc2x89ABCDEF, vmin); in xnn_f32_gemm_minmax_ukernel_4x16__avx_broadcast()
122 _mm256_storeu_ps(c2 + 8, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_4x16__avx_broadcast()
145 vacc2x01234567 = vacc2x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_4x16__avx_broadcast()
D4x16-minmax-fma3-broadcast.c65 __m256 vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_4x16__fma3_broadcast() local
91 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEF, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_4x16__fma3_broadcast()
104 vacc2x89ABCDEF = _mm256_min_ps(vacc2x89ABCDEF, vmax); in xnn_f32_gemm_minmax_ukernel_4x16__fma3_broadcast()
114 vacc2x89ABCDEF = _mm256_max_ps(vacc2x89ABCDEF, vmin); in xnn_f32_gemm_minmax_ukernel_4x16__fma3_broadcast()
122 _mm256_storeu_ps(c2 + 8, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_4x16__fma3_broadcast()
145 vacc2x01234567 = vacc2x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_4x16__fma3_broadcast()

123