• Home
  • Raw
  • Download

Lines Matching refs:__m128i

29 static void ConvertYUV444ToRGB(const __m128i* const Y0,  in ConvertYUV444ToRGB()
30 const __m128i* const U0, in ConvertYUV444ToRGB()
31 const __m128i* const V0, in ConvertYUV444ToRGB()
32 __m128i* const R, in ConvertYUV444ToRGB()
33 __m128i* const G, in ConvertYUV444ToRGB()
34 __m128i* const B) { in ConvertYUV444ToRGB()
35 const __m128i k19077 = _mm_set1_epi16(19077); in ConvertYUV444ToRGB()
36 const __m128i k26149 = _mm_set1_epi16(26149); in ConvertYUV444ToRGB()
37 const __m128i k14234 = _mm_set1_epi16(14234); in ConvertYUV444ToRGB()
39 const __m128i k33050 = _mm_set1_epi16((short)33050); in ConvertYUV444ToRGB()
40 const __m128i k17685 = _mm_set1_epi16(17685); in ConvertYUV444ToRGB()
41 const __m128i k6419 = _mm_set1_epi16(6419); in ConvertYUV444ToRGB()
42 const __m128i k13320 = _mm_set1_epi16(13320); in ConvertYUV444ToRGB()
43 const __m128i k8708 = _mm_set1_epi16(8708); in ConvertYUV444ToRGB()
45 const __m128i Y1 = _mm_mulhi_epu16(*Y0, k19077); in ConvertYUV444ToRGB()
47 const __m128i R0 = _mm_mulhi_epu16(*V0, k26149); in ConvertYUV444ToRGB()
48 const __m128i R1 = _mm_sub_epi16(Y1, k14234); in ConvertYUV444ToRGB()
49 const __m128i R2 = _mm_add_epi16(R1, R0); in ConvertYUV444ToRGB()
51 const __m128i G0 = _mm_mulhi_epu16(*U0, k6419); in ConvertYUV444ToRGB()
52 const __m128i G1 = _mm_mulhi_epu16(*V0, k13320); in ConvertYUV444ToRGB()
53 const __m128i G2 = _mm_add_epi16(Y1, k8708); in ConvertYUV444ToRGB()
54 const __m128i G3 = _mm_add_epi16(G0, G1); in ConvertYUV444ToRGB()
55 const __m128i G4 = _mm_sub_epi16(G2, G3); in ConvertYUV444ToRGB()
58 const __m128i B0 = _mm_mulhi_epu16(*U0, k33050); in ConvertYUV444ToRGB()
59 const __m128i B1 = _mm_adds_epu16(B0, Y1); in ConvertYUV444ToRGB()
60 const __m128i B2 = _mm_subs_epu16(B1, k17685); in ConvertYUV444ToRGB()
69 static WEBP_INLINE __m128i Load_HI_16(const uint8_t* src) { in Load_HI_16()
70 const __m128i zero = _mm_setzero_si128(); in Load_HI_16()
71 return _mm_unpacklo_epi8(zero, _mm_loadl_epi64((const __m128i*)src)); in Load_HI_16()
75 static WEBP_INLINE __m128i Load_UV_HI_8(const uint8_t* src) { in Load_UV_HI_8()
76 const __m128i zero = _mm_setzero_si128(); in Load_UV_HI_8()
77 const __m128i tmp0 = _mm_cvtsi32_si128(*(const uint32_t*)src); in Load_UV_HI_8()
78 const __m128i tmp1 = _mm_unpacklo_epi8(zero, tmp0); in Load_UV_HI_8()
86 __m128i* const R, __m128i* const G, __m128i* const B) { in YUV444ToRGB()
87 const __m128i Y0 = Load_HI_16(y), U0 = Load_HI_16(u), V0 = Load_HI_16(v); in YUV444ToRGB()
95 __m128i* const R, __m128i* const G, __m128i* const B) { in YUV420ToRGB()
96 const __m128i Y0 = Load_HI_16(y), U0 = Load_UV_HI_8(u), V0 = Load_UV_HI_8(v); in YUV420ToRGB()
101 static WEBP_INLINE void PackAndStore4(const __m128i* const R, in PackAndStore4()
102 const __m128i* const G, in PackAndStore4()
103 const __m128i* const B, in PackAndStore4()
104 const __m128i* const A, in PackAndStore4()
106 const __m128i rb = _mm_packus_epi16(*R, *B); in PackAndStore4()
107 const __m128i ga = _mm_packus_epi16(*G, *A); in PackAndStore4()
108 const __m128i rg = _mm_unpacklo_epi8(rb, ga); in PackAndStore4()
109 const __m128i ba = _mm_unpackhi_epi8(rb, ga); in PackAndStore4()
110 const __m128i RGBA_lo = _mm_unpacklo_epi16(rg, ba); in PackAndStore4()
111 const __m128i RGBA_hi = _mm_unpackhi_epi16(rg, ba); in PackAndStore4()
112 _mm_storeu_si128((__m128i*)(dst + 0), RGBA_lo); in PackAndStore4()
113 _mm_storeu_si128((__m128i*)(dst + 16), RGBA_hi); in PackAndStore4()
117 static WEBP_INLINE void PackAndStore4444(const __m128i* const R, in PackAndStore4444()
118 const __m128i* const G, in PackAndStore4444()
119 const __m128i* const B, in PackAndStore4444()
120 const __m128i* const A, in PackAndStore4444()
123 const __m128i rg0 = _mm_packus_epi16(*R, *G); in PackAndStore4444()
124 const __m128i ba0 = _mm_packus_epi16(*B, *A); in PackAndStore4444()
126 const __m128i rg0 = _mm_packus_epi16(*B, *A); in PackAndStore4444()
127 const __m128i ba0 = _mm_packus_epi16(*R, *G); in PackAndStore4444()
129 const __m128i mask_0xf0 = _mm_set1_epi8(0xf0); in PackAndStore4444()
130 const __m128i rb1 = _mm_unpacklo_epi8(rg0, ba0); // rbrbrbrbrb... in PackAndStore4444()
131 const __m128i ga1 = _mm_unpackhi_epi8(rg0, ba0); // gagagagaga... in PackAndStore4444()
132 const __m128i rb2 = _mm_and_si128(rb1, mask_0xf0); in PackAndStore4444()
133 const __m128i ga2 = _mm_srli_epi16(_mm_and_si128(ga1, mask_0xf0), 4); in PackAndStore4444()
134 const __m128i rgba4444 = _mm_or_si128(rb2, ga2); in PackAndStore4444()
135 _mm_storeu_si128((__m128i*)dst, rgba4444); in PackAndStore4444()
139 static WEBP_INLINE void PackAndStore565(const __m128i* const R, in PackAndStore565()
140 const __m128i* const G, in PackAndStore565()
141 const __m128i* const B, in PackAndStore565()
143 const __m128i r0 = _mm_packus_epi16(*R, *R); in PackAndStore565()
144 const __m128i g0 = _mm_packus_epi16(*G, *G); in PackAndStore565()
145 const __m128i b0 = _mm_packus_epi16(*B, *B); in PackAndStore565()
146 const __m128i r1 = _mm_and_si128(r0, _mm_set1_epi8(0xf8)); in PackAndStore565()
147 const __m128i b1 = _mm_and_si128(_mm_srli_epi16(b0, 3), _mm_set1_epi8(0x1f)); in PackAndStore565()
148 const __m128i g1 = _mm_srli_epi16(_mm_and_si128(g0, _mm_set1_epi8(0xe0)), 5); in PackAndStore565()
149 const __m128i g2 = _mm_slli_epi16(_mm_and_si128(g0, _mm_set1_epi8(0x1c)), 3); in PackAndStore565()
150 const __m128i rg = _mm_or_si128(r1, g1); in PackAndStore565()
151 const __m128i gb = _mm_or_si128(g2, b1); in PackAndStore565()
153 const __m128i rgb565 = _mm_unpacklo_epi8(rg, gb); in PackAndStore565()
155 const __m128i rgb565 = _mm_unpacklo_epi8(gb, rg); in PackAndStore565()
157 _mm_storeu_si128((__m128i*)dst, rgb565); in PackAndStore565()
163 static WEBP_INLINE void PlanarTo24b(__m128i* const in0, __m128i* const in1, in PlanarTo24b()
164 __m128i* const in2, __m128i* const in3, in PlanarTo24b()
165 __m128i* const in4, __m128i* const in5, in PlanarTo24b()
181 _mm_storeu_si128((__m128i*)(rgb + 0), *in0); in PlanarTo24b()
182 _mm_storeu_si128((__m128i*)(rgb + 16), *in1); in PlanarTo24b()
183 _mm_storeu_si128((__m128i*)(rgb + 32), *in2); in PlanarTo24b()
184 _mm_storeu_si128((__m128i*)(rgb + 48), *in3); in PlanarTo24b()
185 _mm_storeu_si128((__m128i*)(rgb + 64), *in4); in PlanarTo24b()
186 _mm_storeu_si128((__m128i*)(rgb + 80), *in5); in PlanarTo24b()
191 const __m128i kAlpha = _mm_set1_epi16(255); in VP8YuvToRgba32()
194 __m128i R, G, B; in VP8YuvToRgba32()
202 const __m128i kAlpha = _mm_set1_epi16(255); in VP8YuvToBgra32()
205 __m128i R, G, B; in VP8YuvToBgra32()
213 const __m128i kAlpha = _mm_set1_epi16(255); in VP8YuvToArgb32()
216 __m128i R, G, B; in VP8YuvToArgb32()
224 const __m128i kAlpha = _mm_set1_epi16(255); in VP8YuvToRgba444432()
227 __m128i R, G, B; in VP8YuvToRgba444432()
237 __m128i R, G, B; in VP8YuvToRgb56532()
245 __m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3; in VP8YuvToRgb32()
246 __m128i rgb0, rgb1, rgb2, rgb3, rgb4, rgb5; in VP8YuvToRgb32()
267 __m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3; in VP8YuvToBgr32()
268 __m128i bgr0, bgr1, bgr2, bgr3, bgr4, bgr5; in VP8YuvToBgr32()
292 const __m128i kAlpha = _mm_set1_epi16(255); in YuvToRgbaRow()
295 __m128i R, G, B; in YuvToRgbaRow()
313 const __m128i kAlpha = _mm_set1_epi16(255); in YuvToBgraRow()
316 __m128i R, G, B; in YuvToBgraRow()
334 const __m128i kAlpha = _mm_set1_epi16(255); in YuvToArgbRow()
337 __m128i R, G, B; in YuvToArgbRow()
357 __m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3; in YuvToRgbRow()
358 __m128i rgb0, rgb1, rgb2, rgb3, rgb4, rgb5; in YuvToRgbRow()
393 __m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3; in YuvToBgrRow()
394 __m128i bgr0, bgr1, bgr2, bgr3, bgr4, bgr5; in YuvToBgrRow()
442 #define LOAD_16(src) _mm_loadu_si128((const __m128i*)(src))
444 #define STORE_16(V, dst) _mm_storeu_si128((__m128i*)(dst), (V))
449 const __m128i* const in /*in[6]*/, __m128i* const out /*out[6]*/) { in RGB24PackedToPlanarHelper()
462 __m128i* const out /*out[6]*/) { in RGB24PackedToPlanar()
463 __m128i tmp[6]; in RGB24PackedToPlanar()
464 tmp[0] = _mm_loadu_si128((const __m128i*)(rgb + 0)); in RGB24PackedToPlanar()
465 tmp[1] = _mm_loadu_si128((const __m128i*)(rgb + 16)); in RGB24PackedToPlanar()
466 tmp[2] = _mm_loadu_si128((const __m128i*)(rgb + 32)); in RGB24PackedToPlanar()
467 tmp[3] = _mm_loadu_si128((const __m128i*)(rgb + 48)); in RGB24PackedToPlanar()
468 tmp[4] = _mm_loadu_si128((const __m128i*)(rgb + 64)); in RGB24PackedToPlanar()
469 tmp[5] = _mm_loadu_si128((const __m128i*)(rgb + 80)); in RGB24PackedToPlanar()
480 __m128i* const rgb /*in[6]*/) { in RGB32PackedToPlanar()
481 const __m128i zero = _mm_setzero_si128(); in RGB32PackedToPlanar()
482 __m128i a0 = LOAD_16(argb + 0); in RGB32PackedToPlanar()
483 __m128i a1 = LOAD_16(argb + 4); in RGB32PackedToPlanar()
484 __m128i a2 = LOAD_16(argb + 8); in RGB32PackedToPlanar()
485 __m128i a3 = LOAD_16(argb + 12); in RGB32PackedToPlanar()
500 const __m128i V0_lo = _mm_madd_epi16(RG_LO, MULT_RG); \
501 const __m128i V0_hi = _mm_madd_epi16(RG_HI, MULT_RG); \
502 const __m128i V1_lo = _mm_madd_epi16(GB_LO, MULT_GB); \
503 const __m128i V1_hi = _mm_madd_epi16(GB_HI, MULT_GB); \
504 const __m128i V2_lo = _mm_add_epi32(V0_lo, V1_lo); \
505 const __m128i V2_hi = _mm_add_epi32(V0_hi, V1_hi); \
506 const __m128i V3_lo = _mm_add_epi32(V2_lo, ROUNDER); \
507 const __m128i V3_hi = _mm_add_epi32(V2_hi, ROUNDER); \
508 const __m128i V5_lo = _mm_srai_epi32(V3_lo, DESCALE_FIX); \
509 const __m128i V5_hi = _mm_srai_epi32(V3_hi, DESCALE_FIX); \
514 static WEBP_INLINE void ConvertRGBToY(const __m128i* const R, in ConvertRGBToY()
515 const __m128i* const G, in ConvertRGBToY()
516 const __m128i* const B, in ConvertRGBToY()
517 __m128i* const Y) { in ConvertRGBToY()
518 const __m128i kRG_y = MK_CST_16(16839, 33059 - 16384); in ConvertRGBToY()
519 const __m128i kGB_y = MK_CST_16(16384, 6420); in ConvertRGBToY()
520 const __m128i kHALF_Y = _mm_set1_epi32((16 << YUV_FIX) + YUV_HALF); in ConvertRGBToY()
522 const __m128i RG_lo = _mm_unpacklo_epi16(*R, *G); in ConvertRGBToY()
523 const __m128i RG_hi = _mm_unpackhi_epi16(*R, *G); in ConvertRGBToY()
524 const __m128i GB_lo = _mm_unpacklo_epi16(*G, *B); in ConvertRGBToY()
525 const __m128i GB_hi = _mm_unpackhi_epi16(*G, *B); in ConvertRGBToY()
529 static WEBP_INLINE void ConvertRGBToUV(const __m128i* const R, in ConvertRGBToUV()
530 const __m128i* const G, in ConvertRGBToUV()
531 const __m128i* const B, in ConvertRGBToUV()
532 __m128i* const U, __m128i* const V) { in ConvertRGBToUV()
533 const __m128i kRG_u = MK_CST_16(-9719, -19081); in ConvertRGBToUV()
534 const __m128i kGB_u = MK_CST_16(0, 28800); in ConvertRGBToUV()
535 const __m128i kRG_v = MK_CST_16(28800, 0); in ConvertRGBToUV()
536 const __m128i kGB_v = MK_CST_16(-24116, -4684); in ConvertRGBToUV()
537 const __m128i kHALF_UV = _mm_set1_epi32(((128 << YUV_FIX) + YUV_HALF) << 2); in ConvertRGBToUV()
539 const __m128i RG_lo = _mm_unpacklo_epi16(*R, *G); in ConvertRGBToUV()
540 const __m128i RG_hi = _mm_unpackhi_epi16(*R, *G); in ConvertRGBToUV()
541 const __m128i GB_lo = _mm_unpacklo_epi16(*G, *B); in ConvertRGBToUV()
542 const __m128i GB_hi = _mm_unpackhi_epi16(*G, *B); in ConvertRGBToUV()
556 __m128i rgb_plane[6]; in ConvertRGB24ToY()
562 const __m128i zero = _mm_setzero_si128(); in ConvertRGB24ToY()
563 __m128i r, g, b, Y0, Y1; in ConvertRGB24ToY()
590 __m128i bgr_plane[6]; in ConvertBGR24ToY()
596 const __m128i zero = _mm_setzero_si128(); in ConvertBGR24ToY()
597 __m128i r, g, b, Y0, Y1; in ConvertBGR24ToY()
624 __m128i Y0, Y1, rgb[6]; in ConvertARGBToY()
639 static void HorizontalAddPack(const __m128i* const A, const __m128i* const B, in HorizontalAddPack()
640 __m128i* const out) { in HorizontalAddPack()
641 const __m128i k2 = _mm_set1_epi16(2); in HorizontalAddPack()
642 const __m128i C = _mm_madd_epi16(*A, k2); in HorizontalAddPack()
643 const __m128i D = _mm_madd_epi16(*B, k2); in HorizontalAddPack()
652 __m128i rgb[6], U0, V0, U1, V1; in ConvertARGBToUV()
668 const __m128i prev_u = LOAD_16(u); in ConvertARGBToUV()
669 const __m128i prev_v = LOAD_16(v); in ConvertARGBToUV()
683 __m128i* const r, in RGBA32PackedToPlanar_16b()
684 __m128i* const g, in RGBA32PackedToPlanar_16b()
685 __m128i* const b) { in RGBA32PackedToPlanar_16b()
686 const __m128i in0 = LOAD_16(rgbx + 0); // r0 | g0 | b0 |x| r1 | g1 | b1 |x in RGBA32PackedToPlanar_16b()
687 const __m128i in1 = LOAD_16(rgbx + 8); // r2 | g2 | b2 |x| r3 | g3 | b3 |x in RGBA32PackedToPlanar_16b()
688 const __m128i in2 = LOAD_16(rgbx + 16); // r4 | ... in RGBA32PackedToPlanar_16b()
689 const __m128i in3 = LOAD_16(rgbx + 24); // r6 | ... in RGBA32PackedToPlanar_16b()
691 const __m128i A0 = _mm_unpacklo_epi16(in0, in1); in RGBA32PackedToPlanar_16b()
692 const __m128i A1 = _mm_unpackhi_epi16(in0, in1); in RGBA32PackedToPlanar_16b()
693 const __m128i A2 = _mm_unpacklo_epi16(in2, in3); in RGBA32PackedToPlanar_16b()
694 const __m128i A3 = _mm_unpackhi_epi16(in2, in3); in RGBA32PackedToPlanar_16b()
695 const __m128i B0 = _mm_unpacklo_epi16(A0, A1); // r0 r1 r2 r3 | g0 g1 .. in RGBA32PackedToPlanar_16b()
696 const __m128i B1 = _mm_unpackhi_epi16(A0, A1); // b0 b1 b2 b3 | x x x x in RGBA32PackedToPlanar_16b()
697 const __m128i B2 = _mm_unpacklo_epi16(A2, A3); // r4 r5 r6 r7 | g4 g5 .. in RGBA32PackedToPlanar_16b()
698 const __m128i B3 = _mm_unpackhi_epi16(A2, A3); // b4 b5 b6 b7 | x x x x in RGBA32PackedToPlanar_16b()
709 __m128i r, g, b, U0, V0, U1, V1; in ConvertRGBA32ToUV()
751 const __m128i zero = _mm_setzero_si128(); in SharpYUVUpdateY_SSE2()
752 const __m128i max = _mm_set1_epi16(MAX_Y); in SharpYUVUpdateY_SSE2()
753 const __m128i one = _mm_set1_epi16(1); in SharpYUVUpdateY_SSE2()
754 __m128i sum = zero; in SharpYUVUpdateY_SSE2()
757 const __m128i A = _mm_loadu_si128((const __m128i*)(ref + i)); in SharpYUVUpdateY_SSE2()
758 const __m128i B = _mm_loadu_si128((const __m128i*)(src + i)); in SharpYUVUpdateY_SSE2()
759 const __m128i C = _mm_loadu_si128((const __m128i*)(dst + i)); in SharpYUVUpdateY_SSE2()
760 const __m128i D = _mm_sub_epi16(A, B); // diff_y in SharpYUVUpdateY_SSE2()
761 const __m128i E = _mm_cmpgt_epi16(zero, D); // sign (-1 or 0) in SharpYUVUpdateY_SSE2()
762 const __m128i F = _mm_add_epi16(C, D); // new_y in SharpYUVUpdateY_SSE2()
763 const __m128i G = _mm_or_si128(E, one); // -1 or 1 in SharpYUVUpdateY_SSE2()
764 const __m128i H = _mm_max_epi16(_mm_min_epi16(F, max), zero); in SharpYUVUpdateY_SSE2()
765 const __m128i I = _mm_madd_epi16(D, G); // sum(abs(...)) in SharpYUVUpdateY_SSE2()
766 _mm_storeu_si128((__m128i*)(dst + i), H); in SharpYUVUpdateY_SSE2()
769 _mm_storeu_si128((__m128i*)tmp, sum); in SharpYUVUpdateY_SSE2()
784 const __m128i A = _mm_loadu_si128((const __m128i*)(ref + i)); in SharpYUVUpdateRGB_SSE2()
785 const __m128i B = _mm_loadu_si128((const __m128i*)(src + i)); in SharpYUVUpdateRGB_SSE2()
786 const __m128i C = _mm_loadu_si128((const __m128i*)(dst + i)); in SharpYUVUpdateRGB_SSE2()
787 const __m128i D = _mm_sub_epi16(A, B); // diff_uv in SharpYUVUpdateRGB_SSE2()
788 const __m128i E = _mm_add_epi16(C, D); // new_uv in SharpYUVUpdateRGB_SSE2()
789 _mm_storeu_si128((__m128i*)(dst + i), E); in SharpYUVUpdateRGB_SSE2()
800 const __m128i kCst8 = _mm_set1_epi16(8); in SharpYUVFilterRow_SSE2()
801 const __m128i max = _mm_set1_epi16(MAX_Y); in SharpYUVFilterRow_SSE2()
802 const __m128i zero = _mm_setzero_si128(); in SharpYUVFilterRow_SSE2()
804 const __m128i a0 = _mm_loadu_si128((const __m128i*)(A + i + 0)); in SharpYUVFilterRow_SSE2()
805 const __m128i a1 = _mm_loadu_si128((const __m128i*)(A + i + 1)); in SharpYUVFilterRow_SSE2()
806 const __m128i b0 = _mm_loadu_si128((const __m128i*)(B + i + 0)); in SharpYUVFilterRow_SSE2()
807 const __m128i b1 = _mm_loadu_si128((const __m128i*)(B + i + 1)); in SharpYUVFilterRow_SSE2()
808 const __m128i a0b1 = _mm_add_epi16(a0, b1); in SharpYUVFilterRow_SSE2()
809 const __m128i a1b0 = _mm_add_epi16(a1, b0); in SharpYUVFilterRow_SSE2()
810 const __m128i a0a1b0b1 = _mm_add_epi16(a0b1, a1b0); // A0+A1+B0+B1 in SharpYUVFilterRow_SSE2()
811 const __m128i a0a1b0b1_8 = _mm_add_epi16(a0a1b0b1, kCst8); in SharpYUVFilterRow_SSE2()
812 const __m128i a0b1_2 = _mm_add_epi16(a0b1, a0b1); // 2*(A0+B1) in SharpYUVFilterRow_SSE2()
813 const __m128i a1b0_2 = _mm_add_epi16(a1b0, a1b0); // 2*(A1+B0) in SharpYUVFilterRow_SSE2()
814 const __m128i c0 = _mm_srai_epi16(_mm_add_epi16(a0b1_2, a0a1b0b1_8), 3); in SharpYUVFilterRow_SSE2()
815 const __m128i c1 = _mm_srai_epi16(_mm_add_epi16(a1b0_2, a0a1b0b1_8), 3); in SharpYUVFilterRow_SSE2()
816 const __m128i d0 = _mm_add_epi16(c1, a0); in SharpYUVFilterRow_SSE2()
817 const __m128i d1 = _mm_add_epi16(c0, a1); in SharpYUVFilterRow_SSE2()
818 const __m128i e0 = _mm_srai_epi16(d0, 1); in SharpYUVFilterRow_SSE2()
819 const __m128i e1 = _mm_srai_epi16(d1, 1); in SharpYUVFilterRow_SSE2()
820 const __m128i f0 = _mm_unpacklo_epi16(e0, e1); in SharpYUVFilterRow_SSE2()
821 const __m128i f1 = _mm_unpackhi_epi16(e0, e1); in SharpYUVFilterRow_SSE2()
822 const __m128i g0 = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 0)); in SharpYUVFilterRow_SSE2()
823 const __m128i g1 = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 8)); in SharpYUVFilterRow_SSE2()
824 const __m128i h0 = _mm_add_epi16(g0, f0); in SharpYUVFilterRow_SSE2()
825 const __m128i h1 = _mm_add_epi16(g1, f1); in SharpYUVFilterRow_SSE2()
826 const __m128i i0 = _mm_max_epi16(_mm_min_epi16(h0, max), zero); in SharpYUVFilterRow_SSE2()
827 const __m128i i1 = _mm_max_epi16(_mm_min_epi16(h1, max), zero); in SharpYUVFilterRow_SSE2()
828 _mm_storeu_si128((__m128i*)(out + 2 * i + 0), i0); in SharpYUVFilterRow_SSE2()
829 _mm_storeu_si128((__m128i*)(out + 2 * i + 8), i1); in SharpYUVFilterRow_SSE2()