Home
last modified time | relevance | path

Searched refs:vacc4x01234567 (Results 1 – 25 of 34) sorted by relevance

12

/external/XNNPACK/src/f16-gemm/gen/
D6x8-neonfp16arith-ld64.c77 float16x8_t vacc4x01234567 = vacc0x01234567; in xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64() local
96 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c0, va4, 0); in xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64()
110 vacc4x01234567 = vfmaq_f16(vacc4x01234567, va4c0, vb01234567c0); in xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64()
120 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c1, va4, 1); in xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64()
134 vacc4x01234567 = vfmaq_f16(vacc4x01234567, va4c1, vb01234567c1); in xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64()
144 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c2, va4, 2); in xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64()
158 vacc4x01234567 = vfmaq_f16(vacc4x01234567, va4c2, vb01234567c2); in xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64()
168 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c3, va4, 3); in xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64()
182 vacc4x01234567 = vfmaq_f16(vacc4x01234567, va4c3, vb01234567c3); in xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64()
203 vacc4x01234567 = vfmaq_f16(vacc4x01234567, va4, vb01234567); in xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64()
[all …]
D8x8-neonfp16arith-ld64.c89 float16x8_t vacc4x01234567 = vacc0x01234567; in xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64() local
112 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c0, va4, 0); in xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64()
130 vacc4x01234567 = vfmaq_f16(vacc4x01234567, va4c0, vb01234567c0); in xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64()
142 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c1, va4, 1); in xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64()
160 vacc4x01234567 = vfmaq_f16(vacc4x01234567, va4c1, vb01234567c1); in xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64()
172 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c2, va4, 2); in xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64()
190 vacc4x01234567 = vfmaq_f16(vacc4x01234567, va4c2, vb01234567c2); in xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64()
202 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c3, va4, 3); in xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64()
220 vacc4x01234567 = vfmaq_f16(vacc4x01234567, va4c3, vb01234567c3); in xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64()
245 vacc4x01234567 = vfmaq_f16(vacc4x01234567, va4, vb01234567); in xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64()
[all …]
/external/XNNPACK/src/f32-gemm/gen-inc/
D5x16s4-fma3-broadcast.c76 __m256 vacc4x01234567 = _mm256_load_ps(acc + 64); in xnn_f32_gemminc_ukernel_5x16s4__fma3_broadcast() local
101 vacc4x01234567 = _mm256_fmadd_ps(va4, vb01234567c0, vacc4x01234567); in xnn_f32_gemminc_ukernel_5x16s4__fma3_broadcast()
121 vacc4x01234567 = _mm256_fmadd_ps(va4, vb01234567c1, vacc4x01234567); in xnn_f32_gemminc_ukernel_5x16s4__fma3_broadcast()
141 vacc4x01234567 = _mm256_fmadd_ps(va4, vb01234567c2, vacc4x01234567); in xnn_f32_gemminc_ukernel_5x16s4__fma3_broadcast()
161 vacc4x01234567 = _mm256_fmadd_ps(va4, vb01234567c3, vacc4x01234567); in xnn_f32_gemminc_ukernel_5x16s4__fma3_broadcast()
193 vacc4x01234567 = _mm256_fmadd_ps(va4, vb01234567, vacc4x01234567); in xnn_f32_gemminc_ukernel_5x16s4__fma3_broadcast()
209 vacc4x01234567 = _mm256_min_ps(vacc4x01234567, vmax); in xnn_f32_gemminc_ukernel_5x16s4__fma3_broadcast()
221 vacc4x01234567 = _mm256_max_ps(vacc4x01234567, vmin); in xnn_f32_gemminc_ukernel_5x16s4__fma3_broadcast()
229 _mm256_storeu_ps(c4, vacc4x01234567); in xnn_f32_gemminc_ukernel_5x16s4__fma3_broadcast()
254 _mm256_storeu_ps(c4, vacc4x01234567); in xnn_f32_gemminc_ukernel_5x16s4__fma3_broadcast()
[all …]
D5x16-avx-broadcast.c76 __m256 vacc4x01234567 = _mm256_load_ps(acc + 64); in xnn_f32_gemminc_ukernel_5x16__avx_broadcast() local
101 vacc4x01234567 = _mm256_add_ps(vacc4x01234567, _mm256_mul_ps(va4, vb01234567)); in xnn_f32_gemminc_ukernel_5x16__avx_broadcast()
116 vacc4x01234567 = _mm256_min_ps(vacc4x01234567, vmax); in xnn_f32_gemminc_ukernel_5x16__avx_broadcast()
128 vacc4x01234567 = _mm256_max_ps(vacc4x01234567, vmin); in xnn_f32_gemminc_ukernel_5x16__avx_broadcast()
136 _mm256_storeu_ps(c4, vacc4x01234567); in xnn_f32_gemminc_ukernel_5x16__avx_broadcast()
161 _mm256_storeu_ps(c4, vacc4x01234567); in xnn_f32_gemminc_ukernel_5x16__avx_broadcast()
167 vacc4x01234567 = vacc4x89ABCDEF; in xnn_f32_gemminc_ukernel_5x16__avx_broadcast()
179 __m128 vacc4x0123 = _mm256_castps256_ps128(vacc4x01234567); in xnn_f32_gemminc_ukernel_5x16__avx_broadcast()
191 vacc4x0123 = _mm256_extractf128_ps(vacc4x01234567, 1); in xnn_f32_gemminc_ukernel_5x16__avx_broadcast()
D5x16-fma3-broadcast.c76 __m256 vacc4x01234567 = _mm256_load_ps(acc + 64); in xnn_f32_gemminc_ukernel_5x16__fma3_broadcast() local
101 vacc4x01234567 = _mm256_fmadd_ps(va4, vb01234567, vacc4x01234567); in xnn_f32_gemminc_ukernel_5x16__fma3_broadcast()
116 vacc4x01234567 = _mm256_min_ps(vacc4x01234567, vmax); in xnn_f32_gemminc_ukernel_5x16__fma3_broadcast()
128 vacc4x01234567 = _mm256_max_ps(vacc4x01234567, vmin); in xnn_f32_gemminc_ukernel_5x16__fma3_broadcast()
136 _mm256_storeu_ps(c4, vacc4x01234567); in xnn_f32_gemminc_ukernel_5x16__fma3_broadcast()
161 _mm256_storeu_ps(c4, vacc4x01234567); in xnn_f32_gemminc_ukernel_5x16__fma3_broadcast()
167 vacc4x01234567 = vacc4x89ABCDEF; in xnn_f32_gemminc_ukernel_5x16__fma3_broadcast()
179 __m128 vacc4x0123 = _mm256_castps256_ps128(vacc4x01234567); in xnn_f32_gemminc_ukernel_5x16__fma3_broadcast()
191 vacc4x0123 = _mm256_extractf128_ps(vacc4x01234567, 1); in xnn_f32_gemminc_ukernel_5x16__fma3_broadcast()
D5x8-avx-broadcast.c72 __m256 vacc4x01234567 = _mm256_load_ps(acc + 32); in xnn_f32_gemminc_ukernel_5x8__avx_broadcast() local
95 vacc4x01234567 = _mm256_add_ps(vacc4x01234567, _mm256_mul_ps(va4, vb01234567)); in xnn_f32_gemminc_ukernel_5x8__avx_broadcast()
105 vacc4x01234567 = _mm256_min_ps(vacc4x01234567, vmax); in xnn_f32_gemminc_ukernel_5x8__avx_broadcast()
112 vacc4x01234567 = _mm256_max_ps(vacc4x01234567, vmin); in xnn_f32_gemminc_ukernel_5x8__avx_broadcast()
115 _mm256_storeu_ps(c4, vacc4x01234567); in xnn_f32_gemminc_ukernel_5x8__avx_broadcast()
134 __m128 vacc4x0123 = _mm256_castps256_ps128(vacc4x01234567); in xnn_f32_gemminc_ukernel_5x8__avx_broadcast()
146 vacc4x0123 = _mm256_extractf128_ps(vacc4x01234567, 1); in xnn_f32_gemminc_ukernel_5x8__avx_broadcast()
D5x8-fma3-broadcast.c72 __m256 vacc4x01234567 = _mm256_load_ps(acc + 32); in xnn_f32_gemminc_ukernel_5x8__fma3_broadcast() local
95 vacc4x01234567 = _mm256_fmadd_ps(va4, vb01234567, vacc4x01234567); in xnn_f32_gemminc_ukernel_5x8__fma3_broadcast()
105 vacc4x01234567 = _mm256_min_ps(vacc4x01234567, vmax); in xnn_f32_gemminc_ukernel_5x8__fma3_broadcast()
112 vacc4x01234567 = _mm256_max_ps(vacc4x01234567, vmin); in xnn_f32_gemminc_ukernel_5x8__fma3_broadcast()
115 _mm256_storeu_ps(c4, vacc4x01234567); in xnn_f32_gemminc_ukernel_5x8__fma3_broadcast()
134 __m128 vacc4x0123 = _mm256_castps256_ps128(vacc4x01234567); in xnn_f32_gemminc_ukernel_5x8__fma3_broadcast()
146 vacc4x0123 = _mm256_extractf128_ps(vacc4x01234567, 1); in xnn_f32_gemminc_ukernel_5x8__fma3_broadcast()
D6x8-fma3-broadcast.c78 __m256 vacc4x01234567 = _mm256_load_ps(acc + 32); in xnn_f32_gemminc_ukernel_6x8__fma3_broadcast() local
104 vacc4x01234567 = _mm256_fmadd_ps(va4, vb01234567, vacc4x01234567); in xnn_f32_gemminc_ukernel_6x8__fma3_broadcast()
115 vacc4x01234567 = _mm256_min_ps(vacc4x01234567, vmax); in xnn_f32_gemminc_ukernel_6x8__fma3_broadcast()
123 vacc4x01234567 = _mm256_max_ps(vacc4x01234567, vmin); in xnn_f32_gemminc_ukernel_6x8__fma3_broadcast()
129 _mm256_storeu_ps(c4, vacc4x01234567); in xnn_f32_gemminc_ukernel_6x8__fma3_broadcast()
150 __m128 vacc4x0123 = _mm256_castps256_ps128(vacc4x01234567); in xnn_f32_gemminc_ukernel_6x8__fma3_broadcast()
164 vacc4x0123 = _mm256_extractf128_ps(vacc4x01234567, 1); in xnn_f32_gemminc_ukernel_6x8__fma3_broadcast()
D6x8-avx-broadcast.c78 __m256 vacc4x01234567 = _mm256_load_ps(acc + 32); in xnn_f32_gemminc_ukernel_6x8__avx_broadcast() local
104 vacc4x01234567 = _mm256_add_ps(vacc4x01234567, _mm256_mul_ps(va4, vb01234567)); in xnn_f32_gemminc_ukernel_6x8__avx_broadcast()
115 vacc4x01234567 = _mm256_min_ps(vacc4x01234567, vmax); in xnn_f32_gemminc_ukernel_6x8__avx_broadcast()
123 vacc4x01234567 = _mm256_max_ps(vacc4x01234567, vmin); in xnn_f32_gemminc_ukernel_6x8__avx_broadcast()
129 _mm256_storeu_ps(c4, vacc4x01234567); in xnn_f32_gemminc_ukernel_6x8__avx_broadcast()
150 __m128 vacc4x0123 = _mm256_castps256_ps128(vacc4x01234567); in xnn_f32_gemminc_ukernel_6x8__avx_broadcast()
164 vacc4x0123 = _mm256_extractf128_ps(vacc4x01234567, 1); in xnn_f32_gemminc_ukernel_6x8__avx_broadcast()
/external/XNNPACK/src/f32-gemm/gen/
D5x16s4-fma3-broadcast.c74 __m256 vacc4x01234567 = vacc0x01234567; in xnn_f32_gemm_ukernel_5x16s4__fma3_broadcast() local
99 vacc4x01234567 = _mm256_fmadd_ps(va4, vb01234567c0, vacc4x01234567); in xnn_f32_gemm_ukernel_5x16s4__fma3_broadcast()
119 vacc4x01234567 = _mm256_fmadd_ps(va4, vb01234567c1, vacc4x01234567); in xnn_f32_gemm_ukernel_5x16s4__fma3_broadcast()
139 vacc4x01234567 = _mm256_fmadd_ps(va4, vb01234567c2, vacc4x01234567); in xnn_f32_gemm_ukernel_5x16s4__fma3_broadcast()
159 vacc4x01234567 = _mm256_fmadd_ps(va4, vb01234567c3, vacc4x01234567); in xnn_f32_gemm_ukernel_5x16s4__fma3_broadcast()
191 vacc4x01234567 = _mm256_fmadd_ps(va4, vb01234567, vacc4x01234567); in xnn_f32_gemm_ukernel_5x16s4__fma3_broadcast()
207 vacc4x01234567 = _mm256_min_ps(vacc4x01234567, vmax); in xnn_f32_gemm_ukernel_5x16s4__fma3_broadcast()
219 vacc4x01234567 = _mm256_max_ps(vacc4x01234567, vmin); in xnn_f32_gemm_ukernel_5x16s4__fma3_broadcast()
227 _mm256_storeu_ps(c4, vacc4x01234567); in xnn_f32_gemm_ukernel_5x16s4__fma3_broadcast()
252 _mm256_storeu_ps(c4, vacc4x01234567); in xnn_f32_gemm_ukernel_5x16s4__fma3_broadcast()
[all …]
D5x8-fma3-broadcast.c70 __m256 vacc4x01234567 = vacc0x01234567; in xnn_f32_gemm_ukernel_5x8__fma3_broadcast() local
93 vacc4x01234567 = _mm256_fmadd_ps(va4, vb01234567, vacc4x01234567); in xnn_f32_gemm_ukernel_5x8__fma3_broadcast()
103 vacc4x01234567 = _mm256_min_ps(vacc4x01234567, vmax); in xnn_f32_gemm_ukernel_5x8__fma3_broadcast()
110 vacc4x01234567 = _mm256_max_ps(vacc4x01234567, vmin); in xnn_f32_gemm_ukernel_5x8__fma3_broadcast()
113 _mm256_storeu_ps(c4, vacc4x01234567); in xnn_f32_gemm_ukernel_5x8__fma3_broadcast()
132 __m128 vacc4x0123 = _mm256_castps256_ps128(vacc4x01234567); in xnn_f32_gemm_ukernel_5x8__fma3_broadcast()
144 vacc4x0123 = _mm256_extractf128_ps(vacc4x01234567, 1); in xnn_f32_gemm_ukernel_5x8__fma3_broadcast()
D5x16-avx-broadcast.c74 __m256 vacc4x01234567 = vacc0x01234567; in xnn_f32_gemm_ukernel_5x16__avx_broadcast() local
99 vacc4x01234567 = _mm256_add_ps(vacc4x01234567, _mm256_mul_ps(va4, vb01234567)); in xnn_f32_gemm_ukernel_5x16__avx_broadcast()
114 vacc4x01234567 = _mm256_min_ps(vacc4x01234567, vmax); in xnn_f32_gemm_ukernel_5x16__avx_broadcast()
126 vacc4x01234567 = _mm256_max_ps(vacc4x01234567, vmin); in xnn_f32_gemm_ukernel_5x16__avx_broadcast()
134 _mm256_storeu_ps(c4, vacc4x01234567); in xnn_f32_gemm_ukernel_5x16__avx_broadcast()
159 _mm256_storeu_ps(c4, vacc4x01234567); in xnn_f32_gemm_ukernel_5x16__avx_broadcast()
165 vacc4x01234567 = vacc4x89ABCDEF; in xnn_f32_gemm_ukernel_5x16__avx_broadcast()
177 __m128 vacc4x0123 = _mm256_castps256_ps128(vacc4x01234567); in xnn_f32_gemm_ukernel_5x16__avx_broadcast()
189 vacc4x0123 = _mm256_extractf128_ps(vacc4x01234567, 1); in xnn_f32_gemm_ukernel_5x16__avx_broadcast()
D5x16-fma3-broadcast.c74 __m256 vacc4x01234567 = vacc0x01234567; in xnn_f32_gemm_ukernel_5x16__fma3_broadcast() local
99 vacc4x01234567 = _mm256_fmadd_ps(va4, vb01234567, vacc4x01234567); in xnn_f32_gemm_ukernel_5x16__fma3_broadcast()
114 vacc4x01234567 = _mm256_min_ps(vacc4x01234567, vmax); in xnn_f32_gemm_ukernel_5x16__fma3_broadcast()
126 vacc4x01234567 = _mm256_max_ps(vacc4x01234567, vmin); in xnn_f32_gemm_ukernel_5x16__fma3_broadcast()
134 _mm256_storeu_ps(c4, vacc4x01234567); in xnn_f32_gemm_ukernel_5x16__fma3_broadcast()
159 _mm256_storeu_ps(c4, vacc4x01234567); in xnn_f32_gemm_ukernel_5x16__fma3_broadcast()
165 vacc4x01234567 = vacc4x89ABCDEF; in xnn_f32_gemm_ukernel_5x16__fma3_broadcast()
177 __m128 vacc4x0123 = _mm256_castps256_ps128(vacc4x01234567); in xnn_f32_gemm_ukernel_5x16__fma3_broadcast()
189 vacc4x0123 = _mm256_extractf128_ps(vacc4x01234567, 1); in xnn_f32_gemm_ukernel_5x16__fma3_broadcast()
D5x8-avx-broadcast.c70 __m256 vacc4x01234567 = vacc0x01234567; in xnn_f32_gemm_ukernel_5x8__avx_broadcast() local
93 vacc4x01234567 = _mm256_add_ps(vacc4x01234567, _mm256_mul_ps(va4, vb01234567)); in xnn_f32_gemm_ukernel_5x8__avx_broadcast()
103 vacc4x01234567 = _mm256_min_ps(vacc4x01234567, vmax); in xnn_f32_gemm_ukernel_5x8__avx_broadcast()
110 vacc4x01234567 = _mm256_max_ps(vacc4x01234567, vmin); in xnn_f32_gemm_ukernel_5x8__avx_broadcast()
113 _mm256_storeu_ps(c4, vacc4x01234567); in xnn_f32_gemm_ukernel_5x8__avx_broadcast()
132 __m128 vacc4x0123 = _mm256_castps256_ps128(vacc4x01234567); in xnn_f32_gemm_ukernel_5x8__avx_broadcast()
144 vacc4x0123 = _mm256_extractf128_ps(vacc4x01234567, 1); in xnn_f32_gemm_ukernel_5x8__avx_broadcast()
D6x8-avx-broadcast.c76 __m256 vacc4x01234567 = vacc0x01234567; in xnn_f32_gemm_ukernel_6x8__avx_broadcast() local
102 vacc4x01234567 = _mm256_add_ps(vacc4x01234567, _mm256_mul_ps(va4, vb01234567)); in xnn_f32_gemm_ukernel_6x8__avx_broadcast()
113 vacc4x01234567 = _mm256_min_ps(vacc4x01234567, vmax); in xnn_f32_gemm_ukernel_6x8__avx_broadcast()
121 vacc4x01234567 = _mm256_max_ps(vacc4x01234567, vmin); in xnn_f32_gemm_ukernel_6x8__avx_broadcast()
127 _mm256_storeu_ps(c4, vacc4x01234567); in xnn_f32_gemm_ukernel_6x8__avx_broadcast()
148 __m128 vacc4x0123 = _mm256_castps256_ps128(vacc4x01234567); in xnn_f32_gemm_ukernel_6x8__avx_broadcast()
162 vacc4x0123 = _mm256_extractf128_ps(vacc4x01234567, 1); in xnn_f32_gemm_ukernel_6x8__avx_broadcast()
D6x8-fma3-broadcast.c76 __m256 vacc4x01234567 = vacc0x01234567; in xnn_f32_gemm_ukernel_6x8__fma3_broadcast() local
102 vacc4x01234567 = _mm256_fmadd_ps(va4, vb01234567, vacc4x01234567); in xnn_f32_gemm_ukernel_6x8__fma3_broadcast()
113 vacc4x01234567 = _mm256_min_ps(vacc4x01234567, vmax); in xnn_f32_gemm_ukernel_6x8__fma3_broadcast()
121 vacc4x01234567 = _mm256_max_ps(vacc4x01234567, vmin); in xnn_f32_gemm_ukernel_6x8__fma3_broadcast()
127 _mm256_storeu_ps(c4, vacc4x01234567); in xnn_f32_gemm_ukernel_6x8__fma3_broadcast()
148 __m128 vacc4x0123 = _mm256_castps256_ps128(vacc4x01234567); in xnn_f32_gemm_ukernel_6x8__fma3_broadcast()
162 vacc4x0123 = _mm256_extractf128_ps(vacc4x01234567, 1); in xnn_f32_gemm_ukernel_6x8__fma3_broadcast()
/external/XNNPACK/src/f32-igemm/gen/
D5x16s4-fma3-broadcast.c70 __m256 vacc4x01234567 = vacc0x01234567; in xnn_f32_igemm_ukernel_5x16s4__fma3_broadcast() local
124 vacc4x01234567 = _mm256_fmadd_ps(va4, vb01234567c0, vacc4x01234567); in xnn_f32_igemm_ukernel_5x16s4__fma3_broadcast()
144 vacc4x01234567 = _mm256_fmadd_ps(va4, vb01234567c1, vacc4x01234567); in xnn_f32_igemm_ukernel_5x16s4__fma3_broadcast()
164 vacc4x01234567 = _mm256_fmadd_ps(va4, vb01234567c2, vacc4x01234567); in xnn_f32_igemm_ukernel_5x16s4__fma3_broadcast()
184 vacc4x01234567 = _mm256_fmadd_ps(va4, vb01234567c3, vacc4x01234567); in xnn_f32_igemm_ukernel_5x16s4__fma3_broadcast()
216 vacc4x01234567 = _mm256_fmadd_ps(va4, vb01234567, vacc4x01234567); in xnn_f32_igemm_ukernel_5x16s4__fma3_broadcast()
234 vacc4x01234567 = _mm256_min_ps(vacc4x01234567, vmax); in xnn_f32_igemm_ukernel_5x16s4__fma3_broadcast()
246 vacc4x01234567 = _mm256_max_ps(vacc4x01234567, vmin); in xnn_f32_igemm_ukernel_5x16s4__fma3_broadcast()
254 _mm256_storeu_ps(c4, vacc4x01234567); in xnn_f32_igemm_ukernel_5x16s4__fma3_broadcast()
274 _mm256_storeu_ps(c4, vacc4x01234567); in xnn_f32_igemm_ukernel_5x16s4__fma3_broadcast()
[all …]
D5x16-fma3-broadcast.c70 __m256 vacc4x01234567 = vacc0x01234567; in xnn_f32_igemm_ukernel_5x16__fma3_broadcast() local
128 vacc4x01234567 = _mm256_fmadd_ps(va4, vb01234567, vacc4x01234567); in xnn_f32_igemm_ukernel_5x16__fma3_broadcast()
140 vacc4x01234567 = _mm256_min_ps(vacc4x01234567, vmax); in xnn_f32_igemm_ukernel_5x16__fma3_broadcast()
152 vacc4x01234567 = _mm256_max_ps(vacc4x01234567, vmin); in xnn_f32_igemm_ukernel_5x16__fma3_broadcast()
160 _mm256_storeu_ps(c4, vacc4x01234567); in xnn_f32_igemm_ukernel_5x16__fma3_broadcast()
180 _mm256_storeu_ps(c4, vacc4x01234567); in xnn_f32_igemm_ukernel_5x16__fma3_broadcast()
186 vacc4x01234567 = vacc4x89ABCDEF; in xnn_f32_igemm_ukernel_5x16__fma3_broadcast()
198 __m128 vacc4x0123 = _mm256_castps256_ps128(vacc4x01234567); in xnn_f32_igemm_ukernel_5x16__fma3_broadcast()
210 vacc4x0123 = _mm256_extractf128_ps(vacc4x01234567, 1); in xnn_f32_igemm_ukernel_5x16__fma3_broadcast()
D5x16-avx-broadcast.c70 __m256 vacc4x01234567 = vacc0x01234567; in xnn_f32_igemm_ukernel_5x16__avx_broadcast() local
128 vacc4x01234567 = _mm256_add_ps(vacc4x01234567, _mm256_mul_ps(va4, vb01234567)); in xnn_f32_igemm_ukernel_5x16__avx_broadcast()
140 vacc4x01234567 = _mm256_min_ps(vacc4x01234567, vmax); in xnn_f32_igemm_ukernel_5x16__avx_broadcast()
152 vacc4x01234567 = _mm256_max_ps(vacc4x01234567, vmin); in xnn_f32_igemm_ukernel_5x16__avx_broadcast()
160 _mm256_storeu_ps(c4, vacc4x01234567); in xnn_f32_igemm_ukernel_5x16__avx_broadcast()
180 _mm256_storeu_ps(c4, vacc4x01234567); in xnn_f32_igemm_ukernel_5x16__avx_broadcast()
186 vacc4x01234567 = vacc4x89ABCDEF; in xnn_f32_igemm_ukernel_5x16__avx_broadcast()
198 __m128 vacc4x0123 = _mm256_castps256_ps128(vacc4x01234567); in xnn_f32_igemm_ukernel_5x16__avx_broadcast()
210 vacc4x0123 = _mm256_extractf128_ps(vacc4x01234567, 1); in xnn_f32_igemm_ukernel_5x16__avx_broadcast()
D5x8-fma3-broadcast.c66 __m256 vacc4x01234567 = vacc0x01234567; in xnn_f32_igemm_ukernel_5x8__fma3_broadcast() local
118 vacc4x01234567 = _mm256_fmadd_ps(va4, vb01234567, vacc4x01234567); in xnn_f32_igemm_ukernel_5x8__fma3_broadcast()
129 vacc4x01234567 = _mm256_min_ps(vacc4x01234567, vmax); in xnn_f32_igemm_ukernel_5x8__fma3_broadcast()
136 vacc4x01234567 = _mm256_max_ps(vacc4x01234567, vmin); in xnn_f32_igemm_ukernel_5x8__fma3_broadcast()
139 _mm256_storeu_ps(c4, vacc4x01234567); in xnn_f32_igemm_ukernel_5x8__fma3_broadcast()
153 __m128 vacc4x0123 = _mm256_castps256_ps128(vacc4x01234567); in xnn_f32_igemm_ukernel_5x8__fma3_broadcast()
165 vacc4x0123 = _mm256_extractf128_ps(vacc4x01234567, 1); in xnn_f32_igemm_ukernel_5x8__fma3_broadcast()
D5x8-avx-broadcast.c66 __m256 vacc4x01234567 = vacc0x01234567; in xnn_f32_igemm_ukernel_5x8__avx_broadcast() local
118 vacc4x01234567 = _mm256_add_ps(vacc4x01234567, _mm256_mul_ps(va4, vb01234567)); in xnn_f32_igemm_ukernel_5x8__avx_broadcast()
129 vacc4x01234567 = _mm256_min_ps(vacc4x01234567, vmax); in xnn_f32_igemm_ukernel_5x8__avx_broadcast()
136 vacc4x01234567 = _mm256_max_ps(vacc4x01234567, vmin); in xnn_f32_igemm_ukernel_5x8__avx_broadcast()
139 _mm256_storeu_ps(c4, vacc4x01234567); in xnn_f32_igemm_ukernel_5x8__avx_broadcast()
153 __m128 vacc4x0123 = _mm256_castps256_ps128(vacc4x01234567); in xnn_f32_igemm_ukernel_5x8__avx_broadcast()
165 vacc4x0123 = _mm256_extractf128_ps(vacc4x01234567, 1); in xnn_f32_igemm_ukernel_5x8__avx_broadcast()
D6x8-fma3-broadcast.c70 __m256 vacc4x01234567 = vacc0x01234567; in xnn_f32_igemm_ukernel_6x8__fma3_broadcast() local
130 vacc4x01234567 = _mm256_fmadd_ps(va4, vb01234567, vacc4x01234567); in xnn_f32_igemm_ukernel_6x8__fma3_broadcast()
142 vacc4x01234567 = _mm256_min_ps(vacc4x01234567, vmax); in xnn_f32_igemm_ukernel_6x8__fma3_broadcast()
150 vacc4x01234567 = _mm256_max_ps(vacc4x01234567, vmin); in xnn_f32_igemm_ukernel_6x8__fma3_broadcast()
156 _mm256_storeu_ps(c4, vacc4x01234567); in xnn_f32_igemm_ukernel_6x8__fma3_broadcast()
171 __m128 vacc4x0123 = _mm256_castps256_ps128(vacc4x01234567); in xnn_f32_igemm_ukernel_6x8__fma3_broadcast()
185 vacc4x0123 = _mm256_extractf128_ps(vacc4x01234567, 1); in xnn_f32_igemm_ukernel_6x8__fma3_broadcast()
D6x8-avx-broadcast.c70 __m256 vacc4x01234567 = vacc0x01234567; in xnn_f32_igemm_ukernel_6x8__avx_broadcast() local
130 vacc4x01234567 = _mm256_add_ps(vacc4x01234567, _mm256_mul_ps(va4, vb01234567)); in xnn_f32_igemm_ukernel_6x8__avx_broadcast()
142 vacc4x01234567 = _mm256_min_ps(vacc4x01234567, vmax); in xnn_f32_igemm_ukernel_6x8__avx_broadcast()
150 vacc4x01234567 = _mm256_max_ps(vacc4x01234567, vmin); in xnn_f32_igemm_ukernel_6x8__avx_broadcast()
156 _mm256_storeu_ps(c4, vacc4x01234567); in xnn_f32_igemm_ukernel_6x8__avx_broadcast()
171 __m128 vacc4x0123 = _mm256_castps256_ps128(vacc4x01234567); in xnn_f32_igemm_ukernel_6x8__avx_broadcast()
185 vacc4x0123 = _mm256_extractf128_ps(vacc4x01234567, 1); in xnn_f32_igemm_ukernel_6x8__avx_broadcast()
D7x8-fma3-broadcast.c74 __m256 vacc4x01234567 = vacc0x01234567; in xnn_f32_igemm_ukernel_7x8__fma3_broadcast() local
142 vacc4x01234567 = _mm256_fmadd_ps(va4, vb01234567, vacc4x01234567); in xnn_f32_igemm_ukernel_7x8__fma3_broadcast()
155 vacc4x01234567 = _mm256_min_ps(vacc4x01234567, vmax); in xnn_f32_igemm_ukernel_7x8__fma3_broadcast()
164 vacc4x01234567 = _mm256_max_ps(vacc4x01234567, vmin); in xnn_f32_igemm_ukernel_7x8__fma3_broadcast()
173 _mm256_storeu_ps(c4, vacc4x01234567); in xnn_f32_igemm_ukernel_7x8__fma3_broadcast()
189 __m128 vacc4x0123 = _mm256_castps256_ps128(vacc4x01234567); in xnn_f32_igemm_ukernel_7x8__fma3_broadcast()
205 vacc4x0123 = _mm256_extractf128_ps(vacc4x01234567, 1); in xnn_f32_igemm_ukernel_7x8__fma3_broadcast()
D7x8-avx-broadcast.c74 __m256 vacc4x01234567 = vacc0x01234567; in xnn_f32_igemm_ukernel_7x8__avx_broadcast() local
142 vacc4x01234567 = _mm256_add_ps(vacc4x01234567, _mm256_mul_ps(va4, vb01234567)); in xnn_f32_igemm_ukernel_7x8__avx_broadcast()
155 vacc4x01234567 = _mm256_min_ps(vacc4x01234567, vmax); in xnn_f32_igemm_ukernel_7x8__avx_broadcast()
164 vacc4x01234567 = _mm256_max_ps(vacc4x01234567, vmin); in xnn_f32_igemm_ukernel_7x8__avx_broadcast()
173 _mm256_storeu_ps(c4, vacc4x01234567); in xnn_f32_igemm_ukernel_7x8__avx_broadcast()
189 __m128 vacc4x0123 = _mm256_castps256_ps128(vacc4x01234567); in xnn_f32_igemm_ukernel_7x8__avx_broadcast()
205 vacc4x0123 = _mm256_extractf128_ps(vacc4x01234567, 1); in xnn_f32_igemm_ukernel_7x8__avx_broadcast()

12