Home
last modified time | relevance | path

Searched refs:_mm256_load_ps (Results 1 – 25 of 104) sorted by relevance

12345

/external/XNNPACK/src/f32-dwconv/gen/
Dup16x25-fma3.c90 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_ukernel_up16x25__fma3()
91 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up16x25__fma3()
98 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up16x25__fma3()
99 const __m256 vk0x89ABCDEF = _mm256_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up16x25__fma3()
107 const __m256 vk1x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_ukernel_up16x25__fma3()
108 const __m256 vk1x89ABCDEF = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up16x25__fma3()
116 const __m256 vk2x01234567 = _mm256_load_ps(w + 48); in xnn_f32_dwconv_ukernel_up16x25__fma3()
117 const __m256 vk2x89ABCDEF = _mm256_load_ps(w + 56); in xnn_f32_dwconv_ukernel_up16x25__fma3()
125 const __m256 vk3x01234567 = _mm256_load_ps(w + 64); in xnn_f32_dwconv_ukernel_up16x25__fma3()
126 const __m256 vk3x89ABCDEF = _mm256_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up16x25__fma3()
[all …]
Dup16x25-fma3-acc2.c90 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_ukernel_up16x25__fma3_acc2()
91 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up16x25__fma3_acc2()
98 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up16x25__fma3_acc2()
99 const __m256 vk0x89ABCDEF = _mm256_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up16x25__fma3_acc2()
107 const __m256 vk1x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_ukernel_up16x25__fma3_acc2()
108 const __m256 vk1x89ABCDEF = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up16x25__fma3_acc2()
116 const __m256 vk2x01234567 = _mm256_load_ps(w + 48); in xnn_f32_dwconv_ukernel_up16x25__fma3_acc2()
117 const __m256 vk2x89ABCDEF = _mm256_load_ps(w + 56); in xnn_f32_dwconv_ukernel_up16x25__fma3_acc2()
125 const __m256 vk3x01234567 = _mm256_load_ps(w + 64); in xnn_f32_dwconv_ukernel_up16x25__fma3_acc2()
126 const __m256 vk3x89ABCDEF = _mm256_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up16x25__fma3_acc2()
[all …]
Dup16x25-avx-acc2.c90 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_ukernel_up16x25__avx_acc2()
91 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up16x25__avx_acc2()
98 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up16x25__avx_acc2()
99 const __m256 vk0x89ABCDEF = _mm256_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up16x25__avx_acc2()
107 const __m256 vk1x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_ukernel_up16x25__avx_acc2()
108 const __m256 vk1x89ABCDEF = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up16x25__avx_acc2()
116 const __m256 vk2x01234567 = _mm256_load_ps(w + 48); in xnn_f32_dwconv_ukernel_up16x25__avx_acc2()
117 const __m256 vk2x89ABCDEF = _mm256_load_ps(w + 56); in xnn_f32_dwconv_ukernel_up16x25__avx_acc2()
125 const __m256 vk3x01234567 = _mm256_load_ps(w + 64); in xnn_f32_dwconv_ukernel_up16x25__avx_acc2()
126 const __m256 vk3x89ABCDEF = _mm256_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up16x25__avx_acc2()
[all …]
Dup16x25-avx.c90 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_ukernel_up16x25__avx()
91 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up16x25__avx()
98 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up16x25__avx()
99 const __m256 vk0x89ABCDEF = _mm256_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up16x25__avx()
107 const __m256 vk1x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_ukernel_up16x25__avx()
108 const __m256 vk1x89ABCDEF = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up16x25__avx()
116 const __m256 vk2x01234567 = _mm256_load_ps(w + 48); in xnn_f32_dwconv_ukernel_up16x25__avx()
117 const __m256 vk2x89ABCDEF = _mm256_load_ps(w + 56); in xnn_f32_dwconv_ukernel_up16x25__avx()
125 const __m256 vk3x01234567 = _mm256_load_ps(w + 64); in xnn_f32_dwconv_ukernel_up16x25__avx()
126 const __m256 vk3x89ABCDEF = _mm256_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up16x25__avx()
[all …]
Dup8x25-fma3-acc2.c90 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_ukernel_up8x25__fma3_acc2()
96 const __m256 vk0x01234567 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up8x25__fma3_acc2()
102 const __m256 vk1x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up8x25__fma3_acc2()
108 const __m256 vk2x01234567 = _mm256_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up8x25__fma3_acc2()
114 const __m256 vk3x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_ukernel_up8x25__fma3_acc2()
120 const __m256 vk4x01234567 = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up8x25__fma3_acc2()
126 const __m256 vk5x01234567 = _mm256_load_ps(w + 48); in xnn_f32_dwconv_ukernel_up8x25__fma3_acc2()
132 const __m256 vk6x01234567 = _mm256_load_ps(w + 56); in xnn_f32_dwconv_ukernel_up8x25__fma3_acc2()
138 const __m256 vk7x01234567 = _mm256_load_ps(w + 64); in xnn_f32_dwconv_ukernel_up8x25__fma3_acc2()
144 const __m256 vk8x01234567 = _mm256_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up8x25__fma3_acc2()
[all …]
Dup8x25-fma3.c90 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_ukernel_up8x25__fma3()
96 const __m256 vk0x01234567 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up8x25__fma3()
102 const __m256 vk1x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up8x25__fma3()
108 const __m256 vk2x01234567 = _mm256_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up8x25__fma3()
114 const __m256 vk3x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_ukernel_up8x25__fma3()
120 const __m256 vk4x01234567 = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up8x25__fma3()
126 const __m256 vk5x01234567 = _mm256_load_ps(w + 48); in xnn_f32_dwconv_ukernel_up8x25__fma3()
132 const __m256 vk6x01234567 = _mm256_load_ps(w + 56); in xnn_f32_dwconv_ukernel_up8x25__fma3()
138 const __m256 vk7x01234567 = _mm256_load_ps(w + 64); in xnn_f32_dwconv_ukernel_up8x25__fma3()
144 const __m256 vk8x01234567 = _mm256_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up8x25__fma3()
[all …]
Dup16x9-fma3-acc2.c58 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_ukernel_up16x9__fma3_acc2()
59 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up16x9__fma3_acc2()
66 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up16x9__fma3_acc2()
67 const __m256 vk0x89ABCDEF = _mm256_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up16x9__fma3_acc2()
75 const __m256 vk1x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_ukernel_up16x9__fma3_acc2()
76 const __m256 vk1x89ABCDEF = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up16x9__fma3_acc2()
84 const __m256 vk2x01234567 = _mm256_load_ps(w + 48); in xnn_f32_dwconv_ukernel_up16x9__fma3_acc2()
85 const __m256 vk2x89ABCDEF = _mm256_load_ps(w + 56); in xnn_f32_dwconv_ukernel_up16x9__fma3_acc2()
93 const __m256 vk3x01234567 = _mm256_load_ps(w + 64); in xnn_f32_dwconv_ukernel_up16x9__fma3_acc2()
94 const __m256 vk3x89ABCDEF = _mm256_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up16x9__fma3_acc2()
[all …]
Dup16x9-fma3.c58 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_ukernel_up16x9__fma3()
59 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up16x9__fma3()
66 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up16x9__fma3()
67 const __m256 vk0x89ABCDEF = _mm256_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up16x9__fma3()
75 const __m256 vk1x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_ukernel_up16x9__fma3()
76 const __m256 vk1x89ABCDEF = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up16x9__fma3()
84 const __m256 vk2x01234567 = _mm256_load_ps(w + 48); in xnn_f32_dwconv_ukernel_up16x9__fma3()
85 const __m256 vk2x89ABCDEF = _mm256_load_ps(w + 56); in xnn_f32_dwconv_ukernel_up16x9__fma3()
93 const __m256 vk3x01234567 = _mm256_load_ps(w + 64); in xnn_f32_dwconv_ukernel_up16x9__fma3()
94 const __m256 vk3x89ABCDEF = _mm256_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up16x9__fma3()
[all …]
Dup8x25-avx.c90 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_ukernel_up8x25__avx()
96 const __m256 vk0x01234567 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up8x25__avx()
102 const __m256 vk1x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up8x25__avx()
108 const __m256 vk2x01234567 = _mm256_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up8x25__avx()
114 const __m256 vk3x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_ukernel_up8x25__avx()
120 const __m256 vk4x01234567 = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up8x25__avx()
126 const __m256 vk5x01234567 = _mm256_load_ps(w + 48); in xnn_f32_dwconv_ukernel_up8x25__avx()
132 const __m256 vk6x01234567 = _mm256_load_ps(w + 56); in xnn_f32_dwconv_ukernel_up8x25__avx()
138 const __m256 vk7x01234567 = _mm256_load_ps(w + 64); in xnn_f32_dwconv_ukernel_up8x25__avx()
144 const __m256 vk8x01234567 = _mm256_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up8x25__avx()
[all …]
Dup8x25-avx-acc2.c90 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_ukernel_up8x25__avx_acc2()
96 const __m256 vk0x01234567 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up8x25__avx_acc2()
102 const __m256 vk1x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up8x25__avx_acc2()
108 const __m256 vk2x01234567 = _mm256_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up8x25__avx_acc2()
114 const __m256 vk3x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_ukernel_up8x25__avx_acc2()
120 const __m256 vk4x01234567 = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up8x25__avx_acc2()
126 const __m256 vk5x01234567 = _mm256_load_ps(w + 48); in xnn_f32_dwconv_ukernel_up8x25__avx_acc2()
132 const __m256 vk6x01234567 = _mm256_load_ps(w + 56); in xnn_f32_dwconv_ukernel_up8x25__avx_acc2()
138 const __m256 vk7x01234567 = _mm256_load_ps(w + 64); in xnn_f32_dwconv_ukernel_up8x25__avx_acc2()
144 const __m256 vk8x01234567 = _mm256_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up8x25__avx_acc2()
[all …]
Dup16x9-avx-acc2.c58 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_ukernel_up16x9__avx_acc2()
59 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up16x9__avx_acc2()
66 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up16x9__avx_acc2()
67 const __m256 vk0x89ABCDEF = _mm256_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up16x9__avx_acc2()
75 const __m256 vk1x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_ukernel_up16x9__avx_acc2()
76 const __m256 vk1x89ABCDEF = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up16x9__avx_acc2()
84 const __m256 vk2x01234567 = _mm256_load_ps(w + 48); in xnn_f32_dwconv_ukernel_up16x9__avx_acc2()
85 const __m256 vk2x89ABCDEF = _mm256_load_ps(w + 56); in xnn_f32_dwconv_ukernel_up16x9__avx_acc2()
93 const __m256 vk3x01234567 = _mm256_load_ps(w + 64); in xnn_f32_dwconv_ukernel_up16x9__avx_acc2()
94 const __m256 vk3x89ABCDEF = _mm256_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up16x9__avx_acc2()
[all …]
Dup16x9-avx.c58 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_ukernel_up16x9__avx()
59 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up16x9__avx()
66 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up16x9__avx()
67 const __m256 vk0x89ABCDEF = _mm256_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up16x9__avx()
75 const __m256 vk1x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_ukernel_up16x9__avx()
76 const __m256 vk1x89ABCDEF = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up16x9__avx()
84 const __m256 vk2x01234567 = _mm256_load_ps(w + 48); in xnn_f32_dwconv_ukernel_up16x9__avx()
85 const __m256 vk2x89ABCDEF = _mm256_load_ps(w + 56); in xnn_f32_dwconv_ukernel_up16x9__avx()
93 const __m256 vk3x01234567 = _mm256_load_ps(w + 64); in xnn_f32_dwconv_ukernel_up16x9__avx()
94 const __m256 vk3x89ABCDEF = _mm256_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up16x9__avx()
[all …]
Dup16x4-fma3.c48 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_ukernel_up16x4__fma3()
49 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up16x4__fma3()
56 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up16x4__fma3()
57 const __m256 vk0x89ABCDEF = _mm256_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up16x4__fma3()
65 const __m256 vk1x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_ukernel_up16x4__fma3()
66 const __m256 vk1x89ABCDEF = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up16x4__fma3()
74 const __m256 vk2x01234567 = _mm256_load_ps(w + 48); in xnn_f32_dwconv_ukernel_up16x4__fma3()
75 const __m256 vk2x89ABCDEF = _mm256_load_ps(w + 56); in xnn_f32_dwconv_ukernel_up16x4__fma3()
83 const __m256 vk3x01234567 = _mm256_load_ps(w + 64); in xnn_f32_dwconv_ukernel_up16x4__fma3()
84 const __m256 vk3x89ABCDEF = _mm256_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up16x4__fma3()
[all …]
Dup16x4-avx-acc2.c48 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_ukernel_up16x4__avx_acc2()
49 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up16x4__avx_acc2()
56 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up16x4__avx_acc2()
57 const __m256 vk0x89ABCDEF = _mm256_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up16x4__avx_acc2()
65 const __m256 vk1x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_ukernel_up16x4__avx_acc2()
66 const __m256 vk1x89ABCDEF = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up16x4__avx_acc2()
74 const __m256 vk2x01234567 = _mm256_load_ps(w + 48); in xnn_f32_dwconv_ukernel_up16x4__avx_acc2()
75 const __m256 vk2x89ABCDEF = _mm256_load_ps(w + 56); in xnn_f32_dwconv_ukernel_up16x4__avx_acc2()
83 const __m256 vk3x01234567 = _mm256_load_ps(w + 64); in xnn_f32_dwconv_ukernel_up16x4__avx_acc2()
84 const __m256 vk3x89ABCDEF = _mm256_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up16x4__avx_acc2()
[all …]
Dup16x4-avx.c48 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_ukernel_up16x4__avx()
49 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up16x4__avx()
56 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up16x4__avx()
57 const __m256 vk0x89ABCDEF = _mm256_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up16x4__avx()
65 const __m256 vk1x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_ukernel_up16x4__avx()
66 const __m256 vk1x89ABCDEF = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up16x4__avx()
74 const __m256 vk2x01234567 = _mm256_load_ps(w + 48); in xnn_f32_dwconv_ukernel_up16x4__avx()
75 const __m256 vk2x89ABCDEF = _mm256_load_ps(w + 56); in xnn_f32_dwconv_ukernel_up16x4__avx()
83 const __m256 vk3x01234567 = _mm256_load_ps(w + 64); in xnn_f32_dwconv_ukernel_up16x4__avx()
84 const __m256 vk3x89ABCDEF = _mm256_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up16x4__avx()
[all …]
Dup8x9-fma3-acc2.c58 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_ukernel_up8x9__fma3_acc2()
64 const __m256 vk0x01234567 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up8x9__fma3_acc2()
70 const __m256 vk1x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up8x9__fma3_acc2()
76 const __m256 vk2x01234567 = _mm256_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up8x9__fma3_acc2()
82 const __m256 vk3x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_ukernel_up8x9__fma3_acc2()
88 const __m256 vk4x01234567 = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up8x9__fma3_acc2()
94 const __m256 vk5x01234567 = _mm256_load_ps(w + 48); in xnn_f32_dwconv_ukernel_up8x9__fma3_acc2()
100 const __m256 vk6x01234567 = _mm256_load_ps(w + 56); in xnn_f32_dwconv_ukernel_up8x9__fma3_acc2()
106 const __m256 vk7x01234567 = _mm256_load_ps(w + 64); in xnn_f32_dwconv_ukernel_up8x9__fma3_acc2()
112 const __m256 vk8x01234567 = _mm256_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up8x9__fma3_acc2()
[all …]
Dup16x4-fma3-acc2.c48 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_ukernel_up16x4__fma3_acc2()
49 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up16x4__fma3_acc2()
56 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up16x4__fma3_acc2()
57 const __m256 vk0x89ABCDEF = _mm256_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up16x4__fma3_acc2()
65 const __m256 vk1x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_ukernel_up16x4__fma3_acc2()
66 const __m256 vk1x89ABCDEF = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up16x4__fma3_acc2()
74 const __m256 vk2x01234567 = _mm256_load_ps(w + 48); in xnn_f32_dwconv_ukernel_up16x4__fma3_acc2()
75 const __m256 vk2x89ABCDEF = _mm256_load_ps(w + 56); in xnn_f32_dwconv_ukernel_up16x4__fma3_acc2()
83 const __m256 vk3x01234567 = _mm256_load_ps(w + 64); in xnn_f32_dwconv_ukernel_up16x4__fma3_acc2()
84 const __m256 vk3x89ABCDEF = _mm256_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up16x4__fma3_acc2()
[all …]
Dup8x9-fma3.c58 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_ukernel_up8x9__fma3()
64 const __m256 vk0x01234567 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up8x9__fma3()
70 const __m256 vk1x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up8x9__fma3()
76 const __m256 vk2x01234567 = _mm256_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up8x9__fma3()
82 const __m256 vk3x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_ukernel_up8x9__fma3()
88 const __m256 vk4x01234567 = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up8x9__fma3()
94 const __m256 vk5x01234567 = _mm256_load_ps(w + 48); in xnn_f32_dwconv_ukernel_up8x9__fma3()
100 const __m256 vk6x01234567 = _mm256_load_ps(w + 56); in xnn_f32_dwconv_ukernel_up8x9__fma3()
106 const __m256 vk7x01234567 = _mm256_load_ps(w + 64); in xnn_f32_dwconv_ukernel_up8x9__fma3()
112 const __m256 vk8x01234567 = _mm256_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up8x9__fma3()
[all …]
Dup8x9-avx.c58 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_ukernel_up8x9__avx()
64 const __m256 vk0x01234567 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up8x9__avx()
70 const __m256 vk1x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up8x9__avx()
76 const __m256 vk2x01234567 = _mm256_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up8x9__avx()
82 const __m256 vk3x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_ukernel_up8x9__avx()
88 const __m256 vk4x01234567 = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up8x9__avx()
94 const __m256 vk5x01234567 = _mm256_load_ps(w + 48); in xnn_f32_dwconv_ukernel_up8x9__avx()
100 const __m256 vk6x01234567 = _mm256_load_ps(w + 56); in xnn_f32_dwconv_ukernel_up8x9__avx()
106 const __m256 vk7x01234567 = _mm256_load_ps(w + 64); in xnn_f32_dwconv_ukernel_up8x9__avx()
112 const __m256 vk8x01234567 = _mm256_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up8x9__avx()
[all …]
Dup8x9-avx-acc2.c58 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_ukernel_up8x9__avx_acc2()
64 const __m256 vk0x01234567 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up8x9__avx_acc2()
70 const __m256 vk1x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up8x9__avx_acc2()
76 const __m256 vk2x01234567 = _mm256_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up8x9__avx_acc2()
82 const __m256 vk3x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_ukernel_up8x9__avx_acc2()
88 const __m256 vk4x01234567 = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up8x9__avx_acc2()
94 const __m256 vk5x01234567 = _mm256_load_ps(w + 48); in xnn_f32_dwconv_ukernel_up8x9__avx_acc2()
100 const __m256 vk6x01234567 = _mm256_load_ps(w + 56); in xnn_f32_dwconv_ukernel_up8x9__avx_acc2()
106 const __m256 vk7x01234567 = _mm256_load_ps(w + 64); in xnn_f32_dwconv_ukernel_up8x9__avx_acc2()
112 const __m256 vk8x01234567 = _mm256_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up8x9__avx_acc2()
[all …]
Dup8x4-fma3.c48 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_ukernel_up8x4__fma3()
54 const __m256 vk0x01234567 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up8x4__fma3()
60 const __m256 vk1x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up8x4__fma3()
66 const __m256 vk2x01234567 = _mm256_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up8x4__fma3()
72 const __m256 vk3x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_ukernel_up8x4__fma3()
89 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_ukernel_up8x4__fma3()
92 const __m256 vk0x01234567 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up8x4__fma3()
96 const __m256 vk1x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up8x4__fma3()
100 const __m256 vk2x01234567 = _mm256_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up8x4__fma3()
104 const __m256 vk3x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_ukernel_up8x4__fma3()
/external/XNNPACK/src/f32-gemm/gen-inc/
D1x16s4-fma3-broadcast.c44 __m256 vacc0x01234567 = _mm256_load_ps(acc + 0); in xnn_f32_gemminc_ukernel_1x16s4__fma3_broadcast()
45 __m256 vacc0x89ABCDEF = _mm256_load_ps(acc + 8); in xnn_f32_gemminc_ukernel_1x16s4__fma3_broadcast()
54 const __m256 vb01234567c0 = _mm256_load_ps(w + 0); in xnn_f32_gemminc_ukernel_1x16s4__fma3_broadcast()
55 const __m256 vb89ABCDEFc0 = _mm256_load_ps(w + 8); in xnn_f32_gemminc_ukernel_1x16s4__fma3_broadcast()
62 const __m256 vb01234567c1 = _mm256_load_ps(w + 16); in xnn_f32_gemminc_ukernel_1x16s4__fma3_broadcast()
63 const __m256 vb89ABCDEFc1 = _mm256_load_ps(w + 24); in xnn_f32_gemminc_ukernel_1x16s4__fma3_broadcast()
70 const __m256 vb01234567c2 = _mm256_load_ps(w + 32); in xnn_f32_gemminc_ukernel_1x16s4__fma3_broadcast()
71 const __m256 vb89ABCDEFc2 = _mm256_load_ps(w + 40); in xnn_f32_gemminc_ukernel_1x16s4__fma3_broadcast()
78 const __m256 vb01234567c3 = _mm256_load_ps(w + 48); in xnn_f32_gemminc_ukernel_1x16s4__fma3_broadcast()
79 const __m256 vb89ABCDEFc3 = _mm256_load_ps(w + 56); in xnn_f32_gemminc_ukernel_1x16s4__fma3_broadcast()
[all …]
D3x16s4-fma3-broadcast.c56 __m256 vacc0x01234567 = _mm256_load_ps(acc + 0); in xnn_f32_gemminc_ukernel_3x16s4__fma3_broadcast()
57 __m256 vacc0x89ABCDEF = _mm256_load_ps(acc + 8); in xnn_f32_gemminc_ukernel_3x16s4__fma3_broadcast()
58 __m256 vacc1x01234567 = _mm256_load_ps(acc + 16); in xnn_f32_gemminc_ukernel_3x16s4__fma3_broadcast()
59 __m256 vacc1x89ABCDEF = _mm256_load_ps(acc + 24); in xnn_f32_gemminc_ukernel_3x16s4__fma3_broadcast()
60 __m256 vacc2x01234567 = _mm256_load_ps(acc + 32); in xnn_f32_gemminc_ukernel_3x16s4__fma3_broadcast()
61 __m256 vacc2x89ABCDEF = _mm256_load_ps(acc + 40); in xnn_f32_gemminc_ukernel_3x16s4__fma3_broadcast()
74 const __m256 vb01234567c0 = _mm256_load_ps(w + 0); in xnn_f32_gemminc_ukernel_3x16s4__fma3_broadcast()
75 const __m256 vb89ABCDEFc0 = _mm256_load_ps(w + 8); in xnn_f32_gemminc_ukernel_3x16s4__fma3_broadcast()
88 const __m256 vb01234567c1 = _mm256_load_ps(w + 16); in xnn_f32_gemminc_ukernel_3x16s4__fma3_broadcast()
89 const __m256 vb89ABCDEFc1 = _mm256_load_ps(w + 24); in xnn_f32_gemminc_ukernel_3x16s4__fma3_broadcast()
[all …]
/external/XNNPACK/src/f32-gemm/gen/
D1x16s4-fma3-broadcast.c42 __m256 vacc0x01234567 = _mm256_load_ps(w + 0); in xnn_f32_gemm_ukernel_1x16s4__fma3_broadcast()
43 __m256 vacc0x89ABCDEF = _mm256_load_ps(w + 8); in xnn_f32_gemm_ukernel_1x16s4__fma3_broadcast()
52 const __m256 vb01234567c0 = _mm256_load_ps(w + 0); in xnn_f32_gemm_ukernel_1x16s4__fma3_broadcast()
53 const __m256 vb89ABCDEFc0 = _mm256_load_ps(w + 8); in xnn_f32_gemm_ukernel_1x16s4__fma3_broadcast()
60 const __m256 vb01234567c1 = _mm256_load_ps(w + 16); in xnn_f32_gemm_ukernel_1x16s4__fma3_broadcast()
61 const __m256 vb89ABCDEFc1 = _mm256_load_ps(w + 24); in xnn_f32_gemm_ukernel_1x16s4__fma3_broadcast()
68 const __m256 vb01234567c2 = _mm256_load_ps(w + 32); in xnn_f32_gemm_ukernel_1x16s4__fma3_broadcast()
69 const __m256 vb89ABCDEFc2 = _mm256_load_ps(w + 40); in xnn_f32_gemm_ukernel_1x16s4__fma3_broadcast()
76 const __m256 vb01234567c3 = _mm256_load_ps(w + 48); in xnn_f32_gemm_ukernel_1x16s4__fma3_broadcast()
77 const __m256 vb89ABCDEFc3 = _mm256_load_ps(w + 56); in xnn_f32_gemm_ukernel_1x16s4__fma3_broadcast()
[all …]
/external/XNNPACK/src/f32-igemm/gen/
D1x16s4-fma3-broadcast.c46 __m256 vacc0x01234567 = _mm256_load_ps(w); in xnn_f32_igemm_ukernel_1x16s4__fma3_broadcast()
47 __m256 vacc0x89ABCDEF = _mm256_load_ps(w + 8); in xnn_f32_igemm_ukernel_1x16s4__fma3_broadcast()
65 const __m256 vb01234567c0 = _mm256_load_ps(w + 0); in xnn_f32_igemm_ukernel_1x16s4__fma3_broadcast()
66 const __m256 vb89ABCDEFc0 = _mm256_load_ps(w + 8); in xnn_f32_igemm_ukernel_1x16s4__fma3_broadcast()
73 const __m256 vb01234567c1 = _mm256_load_ps(w + 16); in xnn_f32_igemm_ukernel_1x16s4__fma3_broadcast()
74 const __m256 vb89ABCDEFc1 = _mm256_load_ps(w + 24); in xnn_f32_igemm_ukernel_1x16s4__fma3_broadcast()
81 const __m256 vb01234567c2 = _mm256_load_ps(w + 32); in xnn_f32_igemm_ukernel_1x16s4__fma3_broadcast()
82 const __m256 vb89ABCDEFc2 = _mm256_load_ps(w + 40); in xnn_f32_igemm_ukernel_1x16s4__fma3_broadcast()
89 const __m256 vb01234567c3 = _mm256_load_ps(w + 48); in xnn_f32_igemm_ukernel_1x16s4__fma3_broadcast()
90 const __m256 vb89ABCDEFc3 = _mm256_load_ps(w + 56); in xnn_f32_igemm_ukernel_1x16s4__fma3_broadcast()
[all …]

12345