Lines Matching refs:m4
83 psubw m4, m0 ; q4-q0
87 ABS2 m4, m5, m2, m3 ; abs(q4-q0) | abs(q5-q0)
89 pcmpgtw m4, reg_F ; abs(q4-q0) > F
93 por m5, m4
101 psubw m4, m3, m0 ; q3-q0
103 ABS2 m4, m5, m6, m7 ; abs(q3-q0) | abs(q2-q0)
104 pcmpgtw m4, reg_F ; abs(q3-q0) > F
113 por m4, m5
124 por m4, m6
306 movu m4, [dst4q+strideq*0-8]
322 mova [%%q0], m4
345 mova m4, [dst4q+strideq*0-16]
363 mova [%%p3], m4
373 mova m4, [dst4q+strideq*0]
391 mova [%%q4], m4
405 mova m4, [%%q4]
424 ; m4-7=free
444 mova m4, [%%p1]
448 psubw m6, m4, m1 ; q1-p1
477 mova m4, [%%p4]
501 por m4, reg_F8I
505 por m4, m2 ; !flat8|!fm
507 por m5, m4, reg_F8O ; !flat16|!fm
508 pandn m2, m4 ; filter4_mask
509 pandn m4, m5 ; filter8_mask
513 pandn m2, m4 ; filter4_mask
514 pxor m4, [pw_m1] ; filter8_mask
544 psllw m4, m2, 3
546 paddw m4, m6
548 paddw m4, reg_P3
550 paddw m4, m1
552 paddw m4, reg_Q0 ; q0+p1+p3+p5+p7*8
554 paddw m4, [pw_8]
555 paddw m5, m4 ; q0+p0+p1+p2+p3+p4+p5+p6*2+p7*7+8
561 FILTER_STEP m4, m5, F16M, 4, %%p6, m3, m2, m6, reg_Q1
566 FILTER_STEP m4, m5, F16M, 4, %%p5, m6, m2, m7, m3
571 FILTER_STEP m4, m5, F16M, 4, %%p4, m7, m2, reg_P3, m6
576 mova m4, reg_P3
577 mova [rsp+3*mmsize], m4
579 FILTER_STEP m4, m5, F16M, 4, %%p3, reg_P3, m2, reg_P2, m7
584 mova m4, reg_P2
585 mova [rsp+4*mmsize], m4
587 FILTER_STEP m4, m5, F16M, 4, %%p2, reg_P2, m2, m1, reg_Q5
590 FILTER_STEP m4, m5, F16M, 4, %%p1, m1, m2, m0, reg_Q6
592 FILTER_STEP m4, m5, F16M, 4, %%p0, m0, m2, reg_Q0, m1, 1
593 FILTER_STEP m4, m5, F16M, 4, %%q0, reg_Q0, [rsp+0*mmsize], reg_Q1, m1, ARCH_X86_64
594 FILTER_STEP m4, m5, F16M, 4, %%q1, reg_Q1, [rsp+1*mmsize], m3, m1, ARCH_X86_64
595 FILTER_STEP m4, m5, F16M, 4, %%q2, m3, [rsp+2*mmsize], m6, m1, 1
596 FILTER_STEP m4, m5, F16M, 4, %%q3, m6, [rsp+3*mmsize], m7, m1
597 FILTER_STEP m4, m5, F16M, 4, %%q4, m7, [rsp+4*mmsize], reg_Q5, m1
598 FILTER_STEP m4, m5, F16M, 4, %%q5, reg_Q5, [rsp+5*mmsize], reg_Q6, m1
599 FILTER_STEP m4, m5, F16M, 4, %%q6, reg_Q6
614 ; m4-5=free
620 psllw m4, m2, 2
622 paddw m4, m7
624 paddw m4, m0
626 paddw m4, [pw_4]
627 paddw m5, m4
637 FILTER_STEP m4, m5, F8M, 3, %%p2, m1, m2, m7, reg_Q1
639 FILTER_STEP m4, m5, F8M, 3, %%p2, m1, m2, m7, reg_Q1, 1
641 FILTER_STEP m4, m5, F8M, 3, %%p1, m7, m2, m0, m3, 1
642 FILTER_STEP m4, m5, F8M, 3, %%p0, m0, m2, reg_Q0, m6, 1
644 FILTER_STEP m4, m5, F8M, 3, %%q0, reg_Q0, m8, reg_Q1, m6, ARCH_X86_64
645 FILTER_STEP m4, m5, F8M, 3, %%q1, reg_Q1, m9, m3, m6, ARCH_X86_64
647 FILTER_STEP m4, m5, F8M, 3, %%q0, reg_Q0, [rsp+0*mmsize], reg_Q1, m6, ARCH_X86_64
648 FILTER_STEP m4, m5, F8M, 3, %%q1, reg_Q1, [rsp+1*mmsize], m3, m6, ARCH_X86_64
650 FILTER_STEP m4, m5, F8M, 3, %%q2, m3
665 ; m4-5=free
670 psubw m4, m7, m6 ; p1-q1
672 pand m4, m3
673 pminsw m4, [pw_ %+ %%maxsgn]
674 pmaxsw m4, [pw_ %+ %%minsgn] ; clip_intp2(p1-q1, 9) -> f
675 paddw m4, m5
677 paddw m4, m5 ; 3*(q0-p0)+f
678 pminsw m4, [pw_ %+ %%maxsgn]
679 pmaxsw m4, [pw_ %+ %%minsgn] ; clip_intp2(3*(q0-p0)+f, 9) -> f
680 pand m4, reg_F4M
681 paddw m5, m4, [pw_4]
682 paddw m4, [pw_3]
684 pminsw m4, [pw_ %+ %%maxsgn]
686 psraw m4, 3 ; min_intp2(f+3, 9)>>3 -> f2
688 paddw m0, m4 ; p0+f2
690 pxor m4, m4
692 pmaxsw m2, m4
693 pmaxsw m0, m4
704 pmaxsw m7, m4
705 pmaxsw m6, m4
728 mova m4, [%%q2]
744 movu [dst4q+strideq*2-8], m4
751 mova m4, [%%p5]
764 mova [dst0q+strideq*2-16], m4
778 mova m4, [%%q5]
797 mova [dst4q+strideq*1], m4