Lines Matching refs:vacc0123p0
165 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse() local
175 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
184 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
193 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi2x0123, vk2x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
202 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
211 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
220 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
229 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
238 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
247 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi8x0123, vk8x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
256 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi9x0123, vk9x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
265 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi10x0123, vk10x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
274 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi11x0123, vk11x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
283 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi12x0123, vk12x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
292 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi13x0123, vk13x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
301 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi14x0123, vk14x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
310 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi15x0123, vk15x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
319 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi16x0123, vk16x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
328 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi17x0123, vk17x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
337 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi18x0123, vk18x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
346 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi19x0123, vk19x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
355 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi20x0123, vk20x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
364 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi21x0123, vk21x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
373 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi22x0123, vk22x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
382 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi23x0123, vk23x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
391 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi24x0123, vk24x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
397 __m128 vacc0123 = _mm_max_ps(vacc0123p0, vmin); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
407 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse() local
413 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
419 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
425 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi2x0123, vk2x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
431 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
437 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
443 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
449 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
455 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
461 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi8x0123, vk8x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
467 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi9x0123, vk9x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
473 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi10x0123, vk10x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
479 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi11x0123, vk11x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
485 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi12x0123, vk12x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
491 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi13x0123, vk13x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
497 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi14x0123, vk14x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
503 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi15x0123, vk15x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
509 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi16x0123, vk16x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
515 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi17x0123, vk17x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
521 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi18x0123, vk18x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
527 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi19x0123, vk19x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
533 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi20x0123, vk20x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
539 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi21x0123, vk21x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
545 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi22x0123, vk22x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
551 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi23x0123, vk23x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
557 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi24x0123, vk24x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
562 __m128 vacc0123 = _mm_max_ps(vacc0123p0, vmin); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
569 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse() local
573 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
577 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
581 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi2x0123, vk2x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
585 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
589 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
593 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
597 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
601 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
605 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi8x0123, vk8x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
609 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi9x0123, vk9x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
613 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi10x0123, vk10x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
617 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi11x0123, vk11x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
621 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi12x0123, vk12x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
625 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi13x0123, vk13x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
629 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi14x0123, vk14x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
633 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi15x0123, vk15x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
637 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi16x0123, vk16x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
641 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi17x0123, vk17x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
645 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi18x0123, vk18x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
649 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi19x0123, vk19x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
653 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi20x0123, vk20x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
657 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi21x0123, vk21x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
661 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi22x0123, vk22x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
665 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi23x0123, vk23x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
669 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi24x0123, vk24x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
672 __m128 vacc0123 = _mm_max_ps(vacc0123p0, vmin); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()