Home
last modified time | relevance | path

Searched refs:_mm_castps_si128 (Results 1 – 25 of 295) sorted by relevance

12345678910>>...12

/external/XNNPACK/src/f32-dwconv2d-chw/gen/
D3x3p1-minmax-ssse3-2x4.c100 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4()
101 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4()
102 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4()
103 …st __m128 vi3x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4()
117 …st __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4()
118 …st __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4()
119 …st __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4()
120 …st __m128 vi3x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4()
162 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4()
163 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4()
[all …]
D3x3p1-minmax-ssse3-6x4.c156 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
157 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
158 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
159 …st __m128 vi3x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
160 …st __m128 vi4x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi4x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
161 …st __m128 vi5x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi5x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
162 …st __m128 vi6x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi6x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
163 …st __m128 vi7x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi7x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
193 …st __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
194 …st __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
[all …]
D3x3p1-minmax-ssse3-1x4.c86 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4()
87 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4()
88 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4()
98 …st __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4()
99 …st __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4()
100 …st __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4()
130 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4()
131 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4()
132 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4()
139 … const __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(vzero, _mm_castps_si128(vi0x4567), 4)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4()
[all …]
D3x3p1-minmax-ssse3-4x4.c128 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
129 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
130 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
131 …st __m128 vi3x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
132 …st __m128 vi4x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi4x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
133 …st __m128 vi5x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi5x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
155 …st __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
156 …st __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
157 …st __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
158 …st __m128 vi3x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
[all …]
D3x3p1-minmax-ssse3-3x4.c114 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4()
115 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4()
116 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4()
117 …st __m128 vi3x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4()
118 …st __m128 vi4x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi4x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4()
136 …st __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4()
137 …st __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4()
138 …st __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4()
139 …st __m128 vi3x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4()
140 …st __m128 vi4x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi4x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4()
[all …]
D3x3p1-minmax-ssse3-5x4.c142 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4()
143 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4()
144 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4()
145 …st __m128 vi3x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4()
146 …st __m128 vi4x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi4x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4()
147 …st __m128 vi5x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi5x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4()
148 …st __m128 vi6x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi6x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4()
174 …st __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4()
175 …st __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4()
176 …st __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4()
[all …]
D3x3p1-minmax-ssse3-1x4-acc2.c86 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2()
87 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2()
88 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2()
98 …st __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2()
99 …st __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2()
100 …st __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2()
131 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2()
132 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2()
133 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2()
140 … const __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(vzero, _mm_castps_si128(vi0x4567), 4)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2()
[all …]
D3x3p1-minmax-ssse3-1x4-acc3.c86 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3()
87 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3()
88 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3()
98 …st __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3()
99 …st __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3()
100 …st __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3()
132 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3()
133 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3()
134 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3()
141 … const __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(vzero, _mm_castps_si128(vi0x4567), 4)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3()
[all …]
D3x3p1-minmax-ssse3-1x4-acc4.c86 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4()
87 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4()
88 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4()
98 …st __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4()
99 …st __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4()
100 …st __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4()
133 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4()
134 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4()
135 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4()
142 … const __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(vzero, _mm_castps_si128(vi0x4567), 4)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4()
[all …]
D3x3p1-minmax-ssse3-2x4-acc2.c100 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2()
101 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2()
102 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2()
103 …st __m128 vi3x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2()
117 …st __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2()
118 …st __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2()
119 …st __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2()
120 …st __m128 vi3x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2()
164 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2()
165 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2()
[all …]
/external/XNNPACK/src/f32-argmaxpool/
D9p8x-sse2-c4.c78 const __m128i vm1 = _mm_castps_si128(_mm_cmpgt_ps(vi1, vmax)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
82 const __m128i vm2 = _mm_castps_si128(_mm_cmpgt_ps(vi2, vmax)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
86 const __m128i vm3 = _mm_castps_si128(_mm_cmpgt_ps(vi3, vmax)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
90 const __m128i vm4 = _mm_castps_si128(_mm_cmpgt_ps(vi4, vmax)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
94 const __m128i vm5 = _mm_castps_si128(_mm_cmpgt_ps(vi5, vmax)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
98 const __m128i vm6 = _mm_castps_si128(_mm_cmpgt_ps(vi6, vmax)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
102 const __m128i vm7 = _mm_castps_si128(_mm_cmpgt_ps(vi7, vmax)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
106 const __m128i vm8 = _mm_castps_si128(_mm_cmpgt_ps(vi8, vmax)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
163 const __m128i vm0 = _mm_castps_si128(_mm_cmpgt_ps(vi0, vmax)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
167 const __m128i vm1 = _mm_castps_si128(_mm_cmpgt_ps(vi1, vmax)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
[all …]
D9x-sse2-c4.c97 const __m128i vm1 = _mm_castps_si128(_mm_cmpgt_ps(vi1, vmax)); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4()
101 const __m128i vm2 = _mm_castps_si128(_mm_cmpgt_ps(vi2, vmax)); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4()
105 const __m128i vm3 = _mm_castps_si128(_mm_cmpgt_ps(vi3, vmax)); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4()
109 const __m128i vm4 = _mm_castps_si128(_mm_cmpgt_ps(vi4, vmax)); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4()
113 const __m128i vm5 = _mm_castps_si128(_mm_cmpgt_ps(vi5, vmax)); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4()
117 const __m128i vm6 = _mm_castps_si128(_mm_cmpgt_ps(vi6, vmax)); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4()
121 const __m128i vm7 = _mm_castps_si128(_mm_cmpgt_ps(vi7, vmax)); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4()
125 const __m128i vm8 = _mm_castps_si128(_mm_cmpgt_ps(vi8, vmax)); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4()
148 const __m128i vm1 = _mm_castps_si128(_mm_cmpgt_ps(vi1, vmax)); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4()
152 const __m128i vm2 = _mm_castps_si128(_mm_cmpgt_ps(vi2, vmax)); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4()
[all …]
/external/XNNPACK/src/x8-zip/
Dx3-sse2.c53 const __m128i vtemp0 = _mm_castps_si128( in xnn_x8_zip_x3_ukernel__sse2()
56 const __m128i vtemp1 = _mm_castps_si128( in xnn_x8_zip_x3_ukernel__sse2()
59 const __m128i vtemp2 = _mm_castps_si128( in xnn_x8_zip_x3_ukernel__sse2()
63 const __m128i vxyz0 = _mm_castps_si128( in xnn_x8_zip_x3_ukernel__sse2()
66 const __m128i vxyz1 = _mm_castps_si128( in xnn_x8_zip_x3_ukernel__sse2()
69 const __m128i vxyz2 = _mm_castps_si128( in xnn_x8_zip_x3_ukernel__sse2()
102 const __m128i vtemp0 = _mm_castps_si128( in xnn_x8_zip_x3_ukernel__sse2()
105 const __m128i vtemp1 = _mm_castps_si128( in xnn_x8_zip_x3_ukernel__sse2()
108 const __m128i vtemp2 = _mm_castps_si128( in xnn_x8_zip_x3_ukernel__sse2()
112 const __m128i vxyz0 = _mm_castps_si128( in xnn_x8_zip_x3_ukernel__sse2()
[all …]
/external/webrtc/common_audio/third_party/ooura/fft_size_128/
Dooura_fft_sse2.cc30 static __inline __m128i _mm_castps_si128(__m128 a) { in _mm_castps_si128() function
137 _mm_castps_si128(x3r0_3i0_3r1_x3i1), _MM_SHUFFLE(2, 3, 0, 1))); in cftmdl_128_SSE2()
150 _mm_storel_epi64((__m128i*)&a[j0 + 0], _mm_castps_si128(xx0)); in cftmdl_128_SSE2()
153 _mm_shuffle_epi32(_mm_castps_si128(xx0), _MM_SHUFFLE(3, 2, 3, 2))); in cftmdl_128_SSE2()
155 _mm_storel_epi64((__m128i*)&a[j0 + 16], _mm_castps_si128(xx1)); in cftmdl_128_SSE2()
158 _mm_shuffle_epi32(_mm_castps_si128(xx1), _MM_SHUFFLE(2, 3, 2, 3))); in cftmdl_128_SSE2()
161 _mm_storel_epi64((__m128i*)&a[j0 + 8], _mm_castps_si128(x1_x3_add)); in cftmdl_128_SSE2()
162 _mm_storel_epi64((__m128i*)&a[j0 + 24], _mm_castps_si128(x1_x3_sub)); in cftmdl_128_SSE2()
164 _mm_storel_epi64((__m128i*)&a[j0 + 40], _mm_castps_si128(yy4)); in cftmdl_128_SSE2()
167 _mm_shuffle_epi32(_mm_castps_si128(yy4), _MM_SHUFFLE(2, 3, 2, 3))); in cftmdl_128_SSE2()
[all …]
/external/XNNPACK/src/f32-sigmoid/gen/
Davx-rr2-p5-div-x80.c76 …const __m128 vs_lo0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn0)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80()
77 …const __m128 vs_hi0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn0, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80()
79 …const __m128 vs_lo1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80()
80 …const __m128 vs_hi1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn1, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80()
82 …const __m128 vs_lo2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn2)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80()
83 …const __m128 vs_hi2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn2, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80()
85 …const __m128 vs_lo3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn3)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80()
86 …const __m128 vs_hi3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn3, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80()
88 …const __m128 vs_lo4 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn4)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80()
89 …const __m128 vs_hi4 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn4, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80()
[all …]
Davx-rr2-p5-div-x72.c73 …const __m128 vs_lo0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn0)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72()
74 …const __m128 vs_hi0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn0, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72()
76 …const __m128 vs_lo1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72()
77 …const __m128 vs_hi1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn1, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72()
79 …const __m128 vs_lo2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn2)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72()
80 …const __m128 vs_hi2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn2, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72()
82 …const __m128 vs_lo3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn3)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72()
83 …const __m128 vs_hi3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn3, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72()
85 …const __m128 vs_lo4 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn4)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72()
86 …const __m128 vs_hi4 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn4, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72()
[all …]
Davx-rr2-p5-div-x64.c70 …const __m128 vs_lo0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn0)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x64()
71 …const __m128 vs_hi0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn0, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x64()
73 …const __m128 vs_lo1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x64()
74 …const __m128 vs_hi1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn1, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x64()
76 …const __m128 vs_lo2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn2)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x64()
77 …const __m128 vs_hi2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn2, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x64()
79 …const __m128 vs_lo3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn3)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x64()
80 …const __m128 vs_hi3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn3, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x64()
82 …const __m128 vs_lo4 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn4)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x64()
83 …const __m128 vs_hi4 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn4, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x64()
[all …]
Davx-rr2-p5-div-x56.c67 …const __m128 vs_lo0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn0)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56()
68 …const __m128 vs_hi0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn0, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56()
70 …const __m128 vs_lo1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56()
71 …const __m128 vs_hi1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn1, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56()
73 …const __m128 vs_lo2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn2)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56()
74 …const __m128 vs_hi2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn2, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56()
76 …const __m128 vs_lo3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn3)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56()
77 …const __m128 vs_hi3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn3, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56()
79 …const __m128 vs_lo4 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn4)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56()
80 …const __m128 vs_hi4 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn4, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56()
[all …]
Dsse2-p5-div-x24.c61 const __m128 vs0123 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn0123), 23)); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24()
62 const __m128 vs4567 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn4567), 23)); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24()
63 const __m128 vs89AB = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn89AB), 23)); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24()
64 const __m128 vsCDEF = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnCDEF), 23)); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24()
65 const __m128 vsGHIJ = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnGHIJ), 23)); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24()
66 const __m128 vsKLMN = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnKLMN), 23)); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24()
152 …const __m128 vm0123 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vx012… in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24()
153 …const __m128 vm4567 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vx456… in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24()
154 …const __m128 vm89AB = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vx89A… in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24()
155 …const __m128 vmCDEF = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vxCDE… in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24()
[all …]
Dsse2-p5-div-x20.c58 const __m128 vs0123 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn0123), 23)); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20()
59 const __m128 vs4567 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn4567), 23)); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20()
60 const __m128 vs89AB = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn89AB), 23)); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20()
61 const __m128 vsCDEF = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnCDEF), 23)); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20()
62 const __m128 vsGHIJ = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnGHIJ), 23)); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20()
136 …const __m128 vm0123 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vx012… in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20()
137 …const __m128 vm4567 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vx456… in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20()
138 …const __m128 vm89AB = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vx89A… in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20()
139 …const __m128 vmCDEF = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vxCDE… in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20()
140 …const __m128 vmGHIJ = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vxGHI… in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20()
[all …]
Davx-rr2-p5-div-x48.c64 …const __m128 vs_lo0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn0)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48()
65 …const __m128 vs_hi0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn0, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48()
67 …const __m128 vs_lo1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48()
68 …const __m128 vs_hi1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn1, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48()
70 …const __m128 vs_lo2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn2)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48()
71 …const __m128 vs_hi2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn2, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48()
73 …const __m128 vs_lo3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn3)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48()
74 …const __m128 vs_hi3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn3, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48()
76 …const __m128 vs_lo4 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn4)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48()
77 …const __m128 vs_hi4 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn4, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48()
[all …]
/external/XNNPACK/src/f32-prelu/gen/
Dsse2-2x8.c59 …8 vmask0x0123 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vi0x0123))); in xnn_f32_prelu_ukernel__sse2_2x8()
61 …8 vmask0x4567 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vi0x4567))); in xnn_f32_prelu_ukernel__sse2_2x8()
63 …8 vmask1x0123 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vi1x0123))); in xnn_f32_prelu_ukernel__sse2_2x8()
65 …8 vmask1x4567 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vi1x4567))); in xnn_f32_prelu_ukernel__sse2_2x8()
89 …8 vmask0x0123 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vi0x0123))); in xnn_f32_prelu_ukernel__sse2_2x8()
91 …8 vmask1x0123 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vi1x0123))); in xnn_f32_prelu_ukernel__sse2_2x8()
111 …8 vmask0x0123 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vi0x0123))); in xnn_f32_prelu_ukernel__sse2_2x8()
113 …8 vmask1x0123 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vi1x0123))); in xnn_f32_prelu_ukernel__sse2_2x8()
/external/XNNPACK/src/f32-velu/gen/
Dvelu-sse2-rr2-p6-x24.c68 __m128 vs0123 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn0123), 23)); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24()
69 __m128 vs4567 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn4567), 23)); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24()
70 __m128 vs89AB = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn89AB), 23)); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24()
71 __m128 vsCDEF = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnCDEF), 23)); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24()
72 __m128 vsGHIJ = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnGHIJ), 23)); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24()
73 __m128 vsKLMN = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnKLMN), 23)); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24()
158 …const __m128 vm0123 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vx012… in xnn_f32_velu_ukernel__sse2_rr2_p6_x24()
160 …const __m128 vm4567 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vx456… in xnn_f32_velu_ukernel__sse2_rr2_p6_x24()
162 …const __m128 vm89AB = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vx89A… in xnn_f32_velu_ukernel__sse2_rr2_p6_x24()
164 …const __m128 vmCDEF = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vxCDE… in xnn_f32_velu_ukernel__sse2_rr2_p6_x24()
[all …]
/external/XNNPACK/src/f32-gemm/gen/
D5x8-minmax-sse2-dup.c92 …const __m128 va0c0000 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va0), _MM_SHUFFLE(0, 0… in xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup()
93 …const __m128 va1c0000 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va1), _MM_SHUFFLE(0, 0… in xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup()
94 …const __m128 va2c0000 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va2), _MM_SHUFFLE(0, 0… in xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup()
95 …const __m128 va3c0000 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va3), _MM_SHUFFLE(0, 0… in xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup()
96 …const __m128 va4c0000 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va4), _MM_SHUFFLE(0, 0… in xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup()
112 …const __m128 va0c1111 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va0), _MM_SHUFFLE(1, 1… in xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup()
113 …const __m128 va1c1111 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va1), _MM_SHUFFLE(1, 1… in xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup()
114 …const __m128 va2c1111 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va2), _MM_SHUFFLE(1, 1… in xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup()
115 …const __m128 va3c1111 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va3), _MM_SHUFFLE(1, 1… in xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup()
116 …const __m128 va4c1111 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va4), _MM_SHUFFLE(1, 1… in xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup()
[all …]
/external/XNNPACK/src/f32-gemm/gen-inc/
D5x8inc-minmax-sse2-dup.c94 …const __m128 va0c0000 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va0), _MM_SHUFFLE(0, 0… in xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup()
95 …const __m128 va1c0000 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va1), _MM_SHUFFLE(0, 0… in xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup()
96 …const __m128 va2c0000 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va2), _MM_SHUFFLE(0, 0… in xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup()
97 …const __m128 va3c0000 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va3), _MM_SHUFFLE(0, 0… in xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup()
98 …const __m128 va4c0000 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va4), _MM_SHUFFLE(0, 0… in xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup()
114 …const __m128 va0c1111 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va0), _MM_SHUFFLE(1, 1… in xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup()
115 …const __m128 va1c1111 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va1), _MM_SHUFFLE(1, 1… in xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup()
116 …const __m128 va2c1111 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va2), _MM_SHUFFLE(1, 1… in xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup()
117 …const __m128 va3c1111 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va3), _MM_SHUFFLE(1, 1… in xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup()
118 …const __m128 va4c1111 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va4), _MM_SHUFFLE(1, 1… in xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup()
[all …]

12345678910>>...12