Lines Matching refs:vacc0123p0
165 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse() local
172 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
178 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
184 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi2x0123, vk2x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
190 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
196 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
202 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
208 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
214 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
220 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi8x0123, vk8x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
226 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi9x0123, vk9x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
232 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi10x0123, vk10x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
238 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi11x0123, vk11x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
244 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi12x0123, vk12x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
250 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi13x0123, vk13x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
256 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi14x0123, vk14x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
262 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi15x0123, vk15x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
268 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi16x0123, vk16x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
274 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi17x0123, vk17x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
280 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi18x0123, vk18x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
286 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi19x0123, vk19x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
292 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi20x0123, vk20x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
298 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi21x0123, vk21x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
304 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi22x0123, vk22x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
310 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi23x0123, vk23x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
316 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi24x0123, vk24x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
321 __m128 vacc0123 = _mm_max_ps(vacc0123p0, vmin); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
328 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse() local
332 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
336 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
340 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi2x0123, vk2x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
344 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
348 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
352 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
356 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
360 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
364 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi8x0123, vk8x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
368 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi9x0123, vk9x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
372 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi10x0123, vk10x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
376 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi11x0123, vk11x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
380 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi12x0123, vk12x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
384 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi13x0123, vk13x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
388 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi14x0123, vk14x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
392 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi15x0123, vk15x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
396 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi16x0123, vk16x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
400 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi17x0123, vk17x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
404 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi18x0123, vk18x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
408 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi19x0123, vk19x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
412 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi20x0123, vk20x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
416 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi21x0123, vk21x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
420 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi22x0123, vk22x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
424 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi23x0123, vk23x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
428 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi24x0123, vk24x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
431 __m128 vacc0123 = _mm_max_ps(vacc0123p0, vmin); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()