Home
last modified time | relevance | path

Searched refs:vacc0x89ABCDEF (Results 1 – 25 of 36) sorted by relevance

12

/external/XNNPACK/src/f32-gemm/gen-inc/
D1x16s4-fma3-broadcast.c45 __m256 vacc0x89ABCDEF = _mm256_load_ps(acc + 8); in xnn_f32_gemminc_ukernel_1x16s4__fma3_broadcast() local
58 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc0, vacc0x89ABCDEF); in xnn_f32_gemminc_ukernel_1x16s4__fma3_broadcast()
66 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc1, vacc0x89ABCDEF); in xnn_f32_gemminc_ukernel_1x16s4__fma3_broadcast()
74 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc2, vacc0x89ABCDEF); in xnn_f32_gemminc_ukernel_1x16s4__fma3_broadcast()
82 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc3, vacc0x89ABCDEF); in xnn_f32_gemminc_ukernel_1x16s4__fma3_broadcast()
98 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEF, vacc0x89ABCDEF); in xnn_f32_gemminc_ukernel_1x16s4__fma3_broadcast()
106 vacc0x89ABCDEF = _mm256_min_ps(vacc0x89ABCDEF, vmax); in xnn_f32_gemminc_ukernel_1x16s4__fma3_broadcast()
110 vacc0x89ABCDEF = _mm256_max_ps(vacc0x89ABCDEF, vmin); in xnn_f32_gemminc_ukernel_1x16s4__fma3_broadcast()
114 _mm256_storeu_ps(c0 + 8, vacc0x89ABCDEF); in xnn_f32_gemminc_ukernel_1x16s4__fma3_broadcast()
124 vacc0x01234567 = vacc0x89ABCDEF; in xnn_f32_gemminc_ukernel_1x16s4__fma3_broadcast()
D1x16-fma3-broadcast.c45 __m256 vacc0x89ABCDEF = _mm256_load_ps(acc + 8); in xnn_f32_gemminc_ukernel_1x16__fma3_broadcast() local
58 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEF, vacc0x89ABCDEF); in xnn_f32_gemminc_ukernel_1x16__fma3_broadcast()
65 vacc0x89ABCDEF = _mm256_min_ps(vacc0x89ABCDEF, vmax); in xnn_f32_gemminc_ukernel_1x16__fma3_broadcast()
69 vacc0x89ABCDEF = _mm256_max_ps(vacc0x89ABCDEF, vmin); in xnn_f32_gemminc_ukernel_1x16__fma3_broadcast()
73 _mm256_storeu_ps(c0 + 8, vacc0x89ABCDEF); in xnn_f32_gemminc_ukernel_1x16__fma3_broadcast()
83 vacc0x01234567 = vacc0x89ABCDEF; in xnn_f32_gemminc_ukernel_1x16__fma3_broadcast()
D1x16-avx-broadcast.c45 __m256 vacc0x89ABCDEF = _mm256_load_ps(acc + 8); in xnn_f32_gemminc_ukernel_1x16__avx_broadcast() local
58 vacc0x89ABCDEF = _mm256_add_ps(vacc0x89ABCDEF, _mm256_mul_ps(va0, vb89ABCDEF)); in xnn_f32_gemminc_ukernel_1x16__avx_broadcast()
65 vacc0x89ABCDEF = _mm256_min_ps(vacc0x89ABCDEF, vmax); in xnn_f32_gemminc_ukernel_1x16__avx_broadcast()
69 vacc0x89ABCDEF = _mm256_max_ps(vacc0x89ABCDEF, vmin); in xnn_f32_gemminc_ukernel_1x16__avx_broadcast()
73 _mm256_storeu_ps(c0 + 8, vacc0x89ABCDEF); in xnn_f32_gemminc_ukernel_1x16__avx_broadcast()
83 vacc0x01234567 = vacc0x89ABCDEF; in xnn_f32_gemminc_ukernel_1x16__avx_broadcast()
D3x16s4-fma3-broadcast.c57 __m256 vacc0x89ABCDEF = _mm256_load_ps(acc + 8); in xnn_f32_gemminc_ukernel_3x16s4__fma3_broadcast() local
80 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc0, vacc0x89ABCDEF); in xnn_f32_gemminc_ukernel_3x16s4__fma3_broadcast()
94 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc1, vacc0x89ABCDEF); in xnn_f32_gemminc_ukernel_3x16s4__fma3_broadcast()
108 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc2, vacc0x89ABCDEF); in xnn_f32_gemminc_ukernel_3x16s4__fma3_broadcast()
122 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc3, vacc0x89ABCDEF); in xnn_f32_gemminc_ukernel_3x16s4__fma3_broadcast()
146 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEF, vacc0x89ABCDEF); in xnn_f32_gemminc_ukernel_3x16s4__fma3_broadcast()
158 vacc0x89ABCDEF = _mm256_min_ps(vacc0x89ABCDEF, vmax); in xnn_f32_gemminc_ukernel_3x16s4__fma3_broadcast()
166 vacc0x89ABCDEF = _mm256_max_ps(vacc0x89ABCDEF, vmin); in xnn_f32_gemminc_ukernel_3x16s4__fma3_broadcast()
178 _mm256_storeu_ps(c0 + 8, vacc0x89ABCDEF); in xnn_f32_gemminc_ukernel_3x16s4__fma3_broadcast()
194 vacc0x01234567 = vacc0x89ABCDEF; in xnn_f32_gemminc_ukernel_3x16s4__fma3_broadcast()
D4x16s4-fma3-broadcast.c63 __m256 vacc0x89ABCDEF = _mm256_load_ps(acc + 8); in xnn_f32_gemminc_ukernel_4x16s4__fma3_broadcast() local
91 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc0, vacc0x89ABCDEF); in xnn_f32_gemminc_ukernel_4x16s4__fma3_broadcast()
108 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc1, vacc0x89ABCDEF); in xnn_f32_gemminc_ukernel_4x16s4__fma3_broadcast()
125 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc2, vacc0x89ABCDEF); in xnn_f32_gemminc_ukernel_4x16s4__fma3_broadcast()
142 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc3, vacc0x89ABCDEF); in xnn_f32_gemminc_ukernel_4x16s4__fma3_broadcast()
170 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEF, vacc0x89ABCDEF); in xnn_f32_gemminc_ukernel_4x16s4__fma3_broadcast()
184 vacc0x89ABCDEF = _mm256_min_ps(vacc0x89ABCDEF, vmax); in xnn_f32_gemminc_ukernel_4x16s4__fma3_broadcast()
194 vacc0x89ABCDEF = _mm256_max_ps(vacc0x89ABCDEF, vmin); in xnn_f32_gemminc_ukernel_4x16s4__fma3_broadcast()
210 _mm256_storeu_ps(c0 + 8, vacc0x89ABCDEF); in xnn_f32_gemminc_ukernel_4x16s4__fma3_broadcast()
229 vacc0x01234567 = vacc0x89ABCDEF; in xnn_f32_gemminc_ukernel_4x16s4__fma3_broadcast()
/external/XNNPACK/src/f32-gemm/gen/
D1x16s4-fma3-broadcast.c43 __m256 vacc0x89ABCDEF = _mm256_load_ps(w + 8); in xnn_f32_gemm_ukernel_1x16s4__fma3_broadcast() local
56 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc0, vacc0x89ABCDEF); in xnn_f32_gemm_ukernel_1x16s4__fma3_broadcast()
64 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc1, vacc0x89ABCDEF); in xnn_f32_gemm_ukernel_1x16s4__fma3_broadcast()
72 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc2, vacc0x89ABCDEF); in xnn_f32_gemm_ukernel_1x16s4__fma3_broadcast()
80 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc3, vacc0x89ABCDEF); in xnn_f32_gemm_ukernel_1x16s4__fma3_broadcast()
96 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEF, vacc0x89ABCDEF); in xnn_f32_gemm_ukernel_1x16s4__fma3_broadcast()
104 vacc0x89ABCDEF = _mm256_min_ps(vacc0x89ABCDEF, vmax); in xnn_f32_gemm_ukernel_1x16s4__fma3_broadcast()
108 vacc0x89ABCDEF = _mm256_max_ps(vacc0x89ABCDEF, vmin); in xnn_f32_gemm_ukernel_1x16s4__fma3_broadcast()
112 _mm256_storeu_ps(c0 + 8, vacc0x89ABCDEF); in xnn_f32_gemm_ukernel_1x16s4__fma3_broadcast()
122 vacc0x01234567 = vacc0x89ABCDEF; in xnn_f32_gemm_ukernel_1x16s4__fma3_broadcast()
D3x16s4-fma3-broadcast.c55 __m256 vacc0x89ABCDEF = _mm256_load_ps(w + 8); in xnn_f32_gemm_ukernel_3x16s4__fma3_broadcast() local
57 __m256 vacc1x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_gemm_ukernel_3x16s4__fma3_broadcast()
59 __m256 vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_gemm_ukernel_3x16s4__fma3_broadcast()
78 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc0, vacc0x89ABCDEF); in xnn_f32_gemm_ukernel_3x16s4__fma3_broadcast()
92 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc1, vacc0x89ABCDEF); in xnn_f32_gemm_ukernel_3x16s4__fma3_broadcast()
106 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc2, vacc0x89ABCDEF); in xnn_f32_gemm_ukernel_3x16s4__fma3_broadcast()
120 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc3, vacc0x89ABCDEF); in xnn_f32_gemm_ukernel_3x16s4__fma3_broadcast()
144 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEF, vacc0x89ABCDEF); in xnn_f32_gemm_ukernel_3x16s4__fma3_broadcast()
156 vacc0x89ABCDEF = _mm256_min_ps(vacc0x89ABCDEF, vmax); in xnn_f32_gemm_ukernel_3x16s4__fma3_broadcast()
164 vacc0x89ABCDEF = _mm256_max_ps(vacc0x89ABCDEF, vmin); in xnn_f32_gemm_ukernel_3x16s4__fma3_broadcast()
[all …]
D1x16-fma3-broadcast.c43 __m256 vacc0x89ABCDEF = _mm256_load_ps(w + 8); in xnn_f32_gemm_ukernel_1x16__fma3_broadcast() local
56 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEF, vacc0x89ABCDEF); in xnn_f32_gemm_ukernel_1x16__fma3_broadcast()
63 vacc0x89ABCDEF = _mm256_min_ps(vacc0x89ABCDEF, vmax); in xnn_f32_gemm_ukernel_1x16__fma3_broadcast()
67 vacc0x89ABCDEF = _mm256_max_ps(vacc0x89ABCDEF, vmin); in xnn_f32_gemm_ukernel_1x16__fma3_broadcast()
71 _mm256_storeu_ps(c0 + 8, vacc0x89ABCDEF); in xnn_f32_gemm_ukernel_1x16__fma3_broadcast()
81 vacc0x01234567 = vacc0x89ABCDEF; in xnn_f32_gemm_ukernel_1x16__fma3_broadcast()
D1x16-avx-broadcast.c43 __m256 vacc0x89ABCDEF = _mm256_load_ps(w + 8); in xnn_f32_gemm_ukernel_1x16__avx_broadcast() local
56 vacc0x89ABCDEF = _mm256_add_ps(vacc0x89ABCDEF, _mm256_mul_ps(va0, vb89ABCDEF)); in xnn_f32_gemm_ukernel_1x16__avx_broadcast()
63 vacc0x89ABCDEF = _mm256_min_ps(vacc0x89ABCDEF, vmax); in xnn_f32_gemm_ukernel_1x16__avx_broadcast()
67 vacc0x89ABCDEF = _mm256_max_ps(vacc0x89ABCDEF, vmin); in xnn_f32_gemm_ukernel_1x16__avx_broadcast()
71 _mm256_storeu_ps(c0 + 8, vacc0x89ABCDEF); in xnn_f32_gemm_ukernel_1x16__avx_broadcast()
81 vacc0x01234567 = vacc0x89ABCDEF; in xnn_f32_gemm_ukernel_1x16__avx_broadcast()
D4x16s4-fma3-broadcast.c61 __m256 vacc0x89ABCDEF = _mm256_load_ps(w + 8); in xnn_f32_gemm_ukernel_4x16s4__fma3_broadcast() local
63 __m256 vacc1x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_gemm_ukernel_4x16s4__fma3_broadcast()
65 __m256 vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_gemm_ukernel_4x16s4__fma3_broadcast()
67 __m256 vacc3x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_gemm_ukernel_4x16s4__fma3_broadcast()
89 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc0, vacc0x89ABCDEF); in xnn_f32_gemm_ukernel_4x16s4__fma3_broadcast()
106 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc1, vacc0x89ABCDEF); in xnn_f32_gemm_ukernel_4x16s4__fma3_broadcast()
123 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc2, vacc0x89ABCDEF); in xnn_f32_gemm_ukernel_4x16s4__fma3_broadcast()
140 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc3, vacc0x89ABCDEF); in xnn_f32_gemm_ukernel_4x16s4__fma3_broadcast()
168 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEF, vacc0x89ABCDEF); in xnn_f32_gemm_ukernel_4x16s4__fma3_broadcast()
182 vacc0x89ABCDEF = _mm256_min_ps(vacc0x89ABCDEF, vmax); in xnn_f32_gemm_ukernel_4x16s4__fma3_broadcast()
[all …]
D5x16s4-fma3-broadcast.c67 __m256 vacc0x89ABCDEF = _mm256_load_ps(w + 8); in xnn_f32_gemm_ukernel_5x16s4__fma3_broadcast() local
69 __m256 vacc1x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_gemm_ukernel_5x16s4__fma3_broadcast()
71 __m256 vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_gemm_ukernel_5x16s4__fma3_broadcast()
73 __m256 vacc3x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_gemm_ukernel_5x16s4__fma3_broadcast()
75 __m256 vacc4x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_gemm_ukernel_5x16s4__fma3_broadcast()
100 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc0, vacc0x89ABCDEF); in xnn_f32_gemm_ukernel_5x16s4__fma3_broadcast()
120 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc1, vacc0x89ABCDEF); in xnn_f32_gemm_ukernel_5x16s4__fma3_broadcast()
140 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc2, vacc0x89ABCDEF); in xnn_f32_gemm_ukernel_5x16s4__fma3_broadcast()
160 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc3, vacc0x89ABCDEF); in xnn_f32_gemm_ukernel_5x16s4__fma3_broadcast()
192 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEF, vacc0x89ABCDEF); in xnn_f32_gemm_ukernel_5x16s4__fma3_broadcast()
[all …]
D3x16-avx-broadcast.c55 __m256 vacc0x89ABCDEF = _mm256_load_ps(w + 8); in xnn_f32_gemm_ukernel_3x16__avx_broadcast() local
57 __m256 vacc1x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_gemm_ukernel_3x16__avx_broadcast()
59 __m256 vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_gemm_ukernel_3x16__avx_broadcast()
78 vacc0x89ABCDEF = _mm256_add_ps(vacc0x89ABCDEF, _mm256_mul_ps(va0, vb89ABCDEF)); in xnn_f32_gemm_ukernel_3x16__avx_broadcast()
89 vacc0x89ABCDEF = _mm256_min_ps(vacc0x89ABCDEF, vmax); in xnn_f32_gemm_ukernel_3x16__avx_broadcast()
97 vacc0x89ABCDEF = _mm256_max_ps(vacc0x89ABCDEF, vmin); in xnn_f32_gemm_ukernel_3x16__avx_broadcast()
109 _mm256_storeu_ps(c0 + 8, vacc0x89ABCDEF); in xnn_f32_gemm_ukernel_3x16__avx_broadcast()
125 vacc0x01234567 = vacc0x89ABCDEF; in xnn_f32_gemm_ukernel_3x16__avx_broadcast()
D3x16-fma3-broadcast.c55 __m256 vacc0x89ABCDEF = _mm256_load_ps(w + 8); in xnn_f32_gemm_ukernel_3x16__fma3_broadcast() local
57 __m256 vacc1x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_gemm_ukernel_3x16__fma3_broadcast()
59 __m256 vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_gemm_ukernel_3x16__fma3_broadcast()
78 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEF, vacc0x89ABCDEF); in xnn_f32_gemm_ukernel_3x16__fma3_broadcast()
89 vacc0x89ABCDEF = _mm256_min_ps(vacc0x89ABCDEF, vmax); in xnn_f32_gemm_ukernel_3x16__fma3_broadcast()
97 vacc0x89ABCDEF = _mm256_max_ps(vacc0x89ABCDEF, vmin); in xnn_f32_gemm_ukernel_3x16__fma3_broadcast()
109 _mm256_storeu_ps(c0 + 8, vacc0x89ABCDEF); in xnn_f32_gemm_ukernel_3x16__fma3_broadcast()
125 vacc0x01234567 = vacc0x89ABCDEF; in xnn_f32_gemm_ukernel_3x16__fma3_broadcast()
D4x16-fma3-broadcast.c61 __m256 vacc0x89ABCDEF = _mm256_load_ps(w + 8); in xnn_f32_gemm_ukernel_4x16__fma3_broadcast() local
63 __m256 vacc1x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_gemm_ukernel_4x16__fma3_broadcast()
65 __m256 vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_gemm_ukernel_4x16__fma3_broadcast()
67 __m256 vacc3x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_gemm_ukernel_4x16__fma3_broadcast()
89 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEF, vacc0x89ABCDEF); in xnn_f32_gemm_ukernel_4x16__fma3_broadcast()
102 vacc0x89ABCDEF = _mm256_min_ps(vacc0x89ABCDEF, vmax); in xnn_f32_gemm_ukernel_4x16__fma3_broadcast()
112 vacc0x89ABCDEF = _mm256_max_ps(vacc0x89ABCDEF, vmin); in xnn_f32_gemm_ukernel_4x16__fma3_broadcast()
128 _mm256_storeu_ps(c0 + 8, vacc0x89ABCDEF); in xnn_f32_gemm_ukernel_4x16__fma3_broadcast()
147 vacc0x01234567 = vacc0x89ABCDEF; in xnn_f32_gemm_ukernel_4x16__fma3_broadcast()
D4x16-avx-broadcast.c61 __m256 vacc0x89ABCDEF = _mm256_load_ps(w + 8); in xnn_f32_gemm_ukernel_4x16__avx_broadcast() local
63 __m256 vacc1x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_gemm_ukernel_4x16__avx_broadcast()
65 __m256 vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_gemm_ukernel_4x16__avx_broadcast()
67 __m256 vacc3x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_gemm_ukernel_4x16__avx_broadcast()
89 vacc0x89ABCDEF = _mm256_add_ps(vacc0x89ABCDEF, _mm256_mul_ps(va0, vb89ABCDEF)); in xnn_f32_gemm_ukernel_4x16__avx_broadcast()
102 vacc0x89ABCDEF = _mm256_min_ps(vacc0x89ABCDEF, vmax); in xnn_f32_gemm_ukernel_4x16__avx_broadcast()
112 vacc0x89ABCDEF = _mm256_max_ps(vacc0x89ABCDEF, vmin); in xnn_f32_gemm_ukernel_4x16__avx_broadcast()
128 _mm256_storeu_ps(c0 + 8, vacc0x89ABCDEF); in xnn_f32_gemm_ukernel_4x16__avx_broadcast()
147 vacc0x01234567 = vacc0x89ABCDEF; in xnn_f32_gemm_ukernel_4x16__avx_broadcast()
/external/XNNPACK/src/f32-igemm/gen/
D1x16s4-fma3-broadcast.c47 __m256 vacc0x89ABCDEF = _mm256_load_ps(w + 8); in xnn_f32_igemm_ukernel_1x16s4__fma3_broadcast() local
69 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc0, vacc0x89ABCDEF); in xnn_f32_igemm_ukernel_1x16s4__fma3_broadcast()
77 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc1, vacc0x89ABCDEF); in xnn_f32_igemm_ukernel_1x16s4__fma3_broadcast()
85 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc2, vacc0x89ABCDEF); in xnn_f32_igemm_ukernel_1x16s4__fma3_broadcast()
93 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc3, vacc0x89ABCDEF); in xnn_f32_igemm_ukernel_1x16s4__fma3_broadcast()
109 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEF, vacc0x89ABCDEF); in xnn_f32_igemm_ukernel_1x16s4__fma3_broadcast()
119 vacc0x89ABCDEF = _mm256_min_ps(vacc0x89ABCDEF, vmax); in xnn_f32_igemm_ukernel_1x16s4__fma3_broadcast()
123 vacc0x89ABCDEF = _mm256_max_ps(vacc0x89ABCDEF, vmin); in xnn_f32_igemm_ukernel_1x16s4__fma3_broadcast()
127 _mm256_storeu_ps(c0 + 8, vacc0x89ABCDEF); in xnn_f32_igemm_ukernel_1x16s4__fma3_broadcast()
136 vacc0x01234567 = vacc0x89ABCDEF; in xnn_f32_igemm_ukernel_1x16s4__fma3_broadcast()
D3x16s4-fma3-broadcast.c55 __m256 vacc0x89ABCDEF = _mm256_load_ps(w + 8); in xnn_f32_igemm_ukernel_3x16s4__fma3_broadcast() local
57 __m256 vacc1x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_igemm_ukernel_3x16s4__fma3_broadcast()
59 __m256 vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_igemm_ukernel_3x16s4__fma3_broadcast()
97 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc0, vacc0x89ABCDEF); in xnn_f32_igemm_ukernel_3x16s4__fma3_broadcast()
111 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc1, vacc0x89ABCDEF); in xnn_f32_igemm_ukernel_3x16s4__fma3_broadcast()
125 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc2, vacc0x89ABCDEF); in xnn_f32_igemm_ukernel_3x16s4__fma3_broadcast()
139 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc3, vacc0x89ABCDEF); in xnn_f32_igemm_ukernel_3x16s4__fma3_broadcast()
163 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEF, vacc0x89ABCDEF); in xnn_f32_igemm_ukernel_3x16s4__fma3_broadcast()
177 vacc0x89ABCDEF = _mm256_min_ps(vacc0x89ABCDEF, vmax); in xnn_f32_igemm_ukernel_3x16s4__fma3_broadcast()
185 vacc0x89ABCDEF = _mm256_max_ps(vacc0x89ABCDEF, vmin); in xnn_f32_igemm_ukernel_3x16s4__fma3_broadcast()
[all …]
D1x16-fma3-broadcast.c47 __m256 vacc0x89ABCDEF = _mm256_load_ps(w + 8); in xnn_f32_igemm_ukernel_1x16__fma3_broadcast() local
69 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEF, vacc0x89ABCDEF); in xnn_f32_igemm_ukernel_1x16__fma3_broadcast()
77 vacc0x89ABCDEF = _mm256_min_ps(vacc0x89ABCDEF, vmax); in xnn_f32_igemm_ukernel_1x16__fma3_broadcast()
81 vacc0x89ABCDEF = _mm256_max_ps(vacc0x89ABCDEF, vmin); in xnn_f32_igemm_ukernel_1x16__fma3_broadcast()
85 _mm256_storeu_ps(c0 + 8, vacc0x89ABCDEF); in xnn_f32_igemm_ukernel_1x16__fma3_broadcast()
94 vacc0x01234567 = vacc0x89ABCDEF; in xnn_f32_igemm_ukernel_1x16__fma3_broadcast()
D1x16-avx-broadcast.c47 __m256 vacc0x89ABCDEF = _mm256_load_ps(w + 8); in xnn_f32_igemm_ukernel_1x16__avx_broadcast() local
69 vacc0x89ABCDEF = _mm256_add_ps(vacc0x89ABCDEF, _mm256_mul_ps(va0, vb89ABCDEF)); in xnn_f32_igemm_ukernel_1x16__avx_broadcast()
77 vacc0x89ABCDEF = _mm256_min_ps(vacc0x89ABCDEF, vmax); in xnn_f32_igemm_ukernel_1x16__avx_broadcast()
81 vacc0x89ABCDEF = _mm256_max_ps(vacc0x89ABCDEF, vmin); in xnn_f32_igemm_ukernel_1x16__avx_broadcast()
85 _mm256_storeu_ps(c0 + 8, vacc0x89ABCDEF); in xnn_f32_igemm_ukernel_1x16__avx_broadcast()
94 vacc0x01234567 = vacc0x89ABCDEF; in xnn_f32_igemm_ukernel_1x16__avx_broadcast()
D4x16s4-fma3-broadcast.c59 __m256 vacc0x89ABCDEF = _mm256_load_ps(w + 8); in xnn_f32_igemm_ukernel_4x16s4__fma3_broadcast() local
61 __m256 vacc1x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_igemm_ukernel_4x16s4__fma3_broadcast()
63 __m256 vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_igemm_ukernel_4x16s4__fma3_broadcast()
65 __m256 vacc3x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_igemm_ukernel_4x16s4__fma3_broadcast()
111 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc0, vacc0x89ABCDEF); in xnn_f32_igemm_ukernel_4x16s4__fma3_broadcast()
128 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc1, vacc0x89ABCDEF); in xnn_f32_igemm_ukernel_4x16s4__fma3_broadcast()
145 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc2, vacc0x89ABCDEF); in xnn_f32_igemm_ukernel_4x16s4__fma3_broadcast()
162 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc3, vacc0x89ABCDEF); in xnn_f32_igemm_ukernel_4x16s4__fma3_broadcast()
190 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEF, vacc0x89ABCDEF); in xnn_f32_igemm_ukernel_4x16s4__fma3_broadcast()
206 vacc0x89ABCDEF = _mm256_min_ps(vacc0x89ABCDEF, vmax); in xnn_f32_igemm_ukernel_4x16s4__fma3_broadcast()
[all …]
D5x16s4-fma3-broadcast.c63 __m256 vacc0x89ABCDEF = _mm256_load_ps(w + 8); in xnn_f32_igemm_ukernel_5x16s4__fma3_broadcast() local
65 __m256 vacc1x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_igemm_ukernel_5x16s4__fma3_broadcast()
67 __m256 vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_igemm_ukernel_5x16s4__fma3_broadcast()
69 __m256 vacc3x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_igemm_ukernel_5x16s4__fma3_broadcast()
71 __m256 vacc4x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_igemm_ukernel_5x16s4__fma3_broadcast()
125 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc0, vacc0x89ABCDEF); in xnn_f32_igemm_ukernel_5x16s4__fma3_broadcast()
145 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc1, vacc0x89ABCDEF); in xnn_f32_igemm_ukernel_5x16s4__fma3_broadcast()
165 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc2, vacc0x89ABCDEF); in xnn_f32_igemm_ukernel_5x16s4__fma3_broadcast()
185 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc3, vacc0x89ABCDEF); in xnn_f32_igemm_ukernel_5x16s4__fma3_broadcast()
217 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEF, vacc0x89ABCDEF); in xnn_f32_igemm_ukernel_5x16s4__fma3_broadcast()
[all …]
D3x16-fma3-broadcast.c55 __m256 vacc0x89ABCDEF = _mm256_load_ps(w + 8); in xnn_f32_igemm_ukernel_3x16__fma3_broadcast() local
57 __m256 vacc1x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_igemm_ukernel_3x16__fma3_broadcast()
59 __m256 vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_igemm_ukernel_3x16__fma3_broadcast()
95 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEF, vacc0x89ABCDEF); in xnn_f32_igemm_ukernel_3x16__fma3_broadcast()
109 vacc0x89ABCDEF = _mm256_min_ps(vacc0x89ABCDEF, vmax); in xnn_f32_igemm_ukernel_3x16__fma3_broadcast()
117 vacc0x89ABCDEF = _mm256_max_ps(vacc0x89ABCDEF, vmin); in xnn_f32_igemm_ukernel_3x16__fma3_broadcast()
129 _mm256_storeu_ps(c0 + 8, vacc0x89ABCDEF); in xnn_f32_igemm_ukernel_3x16__fma3_broadcast()
142 vacc0x01234567 = vacc0x89ABCDEF; in xnn_f32_igemm_ukernel_3x16__fma3_broadcast()
D3x16-avx-broadcast.c55 __m256 vacc0x89ABCDEF = _mm256_load_ps(w + 8); in xnn_f32_igemm_ukernel_3x16__avx_broadcast() local
57 __m256 vacc1x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_igemm_ukernel_3x16__avx_broadcast()
59 __m256 vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_igemm_ukernel_3x16__avx_broadcast()
95 vacc0x89ABCDEF = _mm256_add_ps(vacc0x89ABCDEF, _mm256_mul_ps(va0, vb89ABCDEF)); in xnn_f32_igemm_ukernel_3x16__avx_broadcast()
109 vacc0x89ABCDEF = _mm256_min_ps(vacc0x89ABCDEF, vmax); in xnn_f32_igemm_ukernel_3x16__avx_broadcast()
117 vacc0x89ABCDEF = _mm256_max_ps(vacc0x89ABCDEF, vmin); in xnn_f32_igemm_ukernel_3x16__avx_broadcast()
129 _mm256_storeu_ps(c0 + 8, vacc0x89ABCDEF); in xnn_f32_igemm_ukernel_3x16__avx_broadcast()
142 vacc0x01234567 = vacc0x89ABCDEF; in xnn_f32_igemm_ukernel_3x16__avx_broadcast()
D4x16-fma3-broadcast.c59 __m256 vacc0x89ABCDEF = _mm256_load_ps(w + 8); in xnn_f32_igemm_ukernel_4x16__fma3_broadcast() local
61 __m256 vacc1x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_igemm_ukernel_4x16__fma3_broadcast()
63 __m256 vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_igemm_ukernel_4x16__fma3_broadcast()
65 __m256 vacc3x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_igemm_ukernel_4x16__fma3_broadcast()
108 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEF, vacc0x89ABCDEF); in xnn_f32_igemm_ukernel_4x16__fma3_broadcast()
125 vacc0x89ABCDEF = _mm256_min_ps(vacc0x89ABCDEF, vmax); in xnn_f32_igemm_ukernel_4x16__fma3_broadcast()
135 vacc0x89ABCDEF = _mm256_max_ps(vacc0x89ABCDEF, vmin); in xnn_f32_igemm_ukernel_4x16__fma3_broadcast()
151 _mm256_storeu_ps(c0 + 8, vacc0x89ABCDEF); in xnn_f32_igemm_ukernel_4x16__fma3_broadcast()
166 vacc0x01234567 = vacc0x89ABCDEF; in xnn_f32_igemm_ukernel_4x16__fma3_broadcast()
D4x16-avx-broadcast.c59 __m256 vacc0x89ABCDEF = _mm256_load_ps(w + 8); in xnn_f32_igemm_ukernel_4x16__avx_broadcast() local
61 __m256 vacc1x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_igemm_ukernel_4x16__avx_broadcast()
63 __m256 vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_igemm_ukernel_4x16__avx_broadcast()
65 __m256 vacc3x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_igemm_ukernel_4x16__avx_broadcast()
108 vacc0x89ABCDEF = _mm256_add_ps(vacc0x89ABCDEF, _mm256_mul_ps(va0, vb89ABCDEF)); in xnn_f32_igemm_ukernel_4x16__avx_broadcast()
125 vacc0x89ABCDEF = _mm256_min_ps(vacc0x89ABCDEF, vmax); in xnn_f32_igemm_ukernel_4x16__avx_broadcast()
135 vacc0x89ABCDEF = _mm256_max_ps(vacc0x89ABCDEF, vmin); in xnn_f32_igemm_ukernel_4x16__avx_broadcast()
151 _mm256_storeu_ps(c0 + 8, vacc0x89ABCDEF); in xnn_f32_igemm_ukernel_4x16__avx_broadcast()
166 vacc0x01234567 = vacc0x89ABCDEF; in xnn_f32_igemm_ukernel_4x16__avx_broadcast()

12