Lines Matching refs:vo0p0
132 __m128 vo0p0 = _mm_add_ps(vbias, _mm_mul_ps(vi0x8ACE, vk02)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4() local
133 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi1x8ACE, vk12)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
134 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi2x8ACE, vk22)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
135 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi3x8ACE, vk32)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
136 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi4x8ACE, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
144 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi0x9BDF, vk03)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
145 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi1x9BDF, vk13)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
146 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi2x9BDF, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
147 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi3x9BDF, vk33)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
148 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi4x9BDF, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
167 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi0x68AC, vk00)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
168 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi1x68AC, vk10)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
169 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi2x68AC, vk20)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
170 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi3x68AC, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
171 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi4x68AC, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
216 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi0x79BD, vk01)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
217 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi1x79BD, vk11)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
218 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi2x79BD, vk21)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
219 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi3x79BD, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
220 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi4x79BD, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
239 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi0xACEG, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
240 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi1xACEG, vk14)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
241 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi2xACEG, vk24)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
242 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi3xACEG, vk34)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
243 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi4xACEG, vk44)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
246 __m128 vo0 = _mm_max_ps(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
268 __m128 vo0p0 = _mm_add_ps(vbias, _mm_mul_ps(vi0x8ACE, vk02)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4() local
269 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi1x8ACE, vk12)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
270 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi2x8ACE, vk22)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
271 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi3x8ACE, vk32)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
272 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi4x8ACE, vk42)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
280 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi0x9BDF, vk03)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
281 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi1x9BDF, vk13)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
282 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi2x9BDF, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
283 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi3x9BDF, vk33)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
284 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi4x9BDF, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
298 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi0x68AC, vk00)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
299 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi1x68AC, vk10)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
300 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi2x68AC, vk20)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
301 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi3x68AC, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
302 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi4x68AC, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
310 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi0x79BD, vk01)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
311 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi1x79BD, vk11)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
312 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi2x79BD, vk21)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
313 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi3x79BD, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
314 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi4x79BD, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
329 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi0xACEG, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
330 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi1xACEG, vk14)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
331 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi2xACEG, vk24)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
332 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi3xACEG, vk34)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
333 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi4xACEG, vk44)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
336 __m128 vo0 = _mm_max_ps(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()