• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "build/build_config.h"
6 #include "media/base/simd/convert_rgb_to_yuv.h"
7 #include "media/base/simd/yuv_to_rgb_table.h"
8 
9 #if defined(COMPILER_MSVC)
10 #include <intrin.h>
11 #else
12 #include <mmintrin.h>
13 #include <emmintrin.h>
14 #endif
15 
16 namespace media {
17 
18 #define FIX_SHIFT 12
19 #define FIX(x) ((x) * (1 << FIX_SHIFT))
20 
21 // Define a convenient macro to do static cast.
22 #define INT16_FIX(x) static_cast<int16>(FIX(x))
23 
24 // Android's pixel layout is RGBA, while other platforms
25 // are BGRA.
26 #if defined(OS_ANDROID)
27 SIMD_ALIGNED(const int16 ConvertRGBAToYUV_kTable[8 * 3]) = {
28   INT16_FIX(0.257), INT16_FIX(0.504), INT16_FIX(0.098), 0,
29   INT16_FIX(0.257), INT16_FIX(0.504), INT16_FIX(0.098), 0,
30   -INT16_FIX(0.148), -INT16_FIX(0.291), INT16_FIX(0.439), 0,
31   -INT16_FIX(0.148), -INT16_FIX(0.291), INT16_FIX(0.439), 0,
32   INT16_FIX(0.439), -INT16_FIX(0.368), -INT16_FIX(0.071), 0,
33   INT16_FIX(0.439), -INT16_FIX(0.368), -INT16_FIX(0.071), 0,
34 };
35 #else
36 SIMD_ALIGNED(const int16 ConvertRGBAToYUV_kTable[8 * 3]) = {
37   INT16_FIX(0.098), INT16_FIX(0.504), INT16_FIX(0.257), 0,
38   INT16_FIX(0.098), INT16_FIX(0.504), INT16_FIX(0.257), 0,
39   INT16_FIX(0.439), -INT16_FIX(0.291), -INT16_FIX(0.148), 0,
40   INT16_FIX(0.439), -INT16_FIX(0.291), -INT16_FIX(0.148), 0,
41   -INT16_FIX(0.071), -INT16_FIX(0.368), INT16_FIX(0.439), 0,
42   -INT16_FIX(0.071), -INT16_FIX(0.368), INT16_FIX(0.439), 0,
43 };
44 #endif
45 
46 #undef INT16_FIX
47 
48 // This is the final offset for the conversion from signed yuv values to
49 // unsigned values. It is arranged so that offset of 16 is applied to Y
50 // components and 128 is added to UV components for 2 pixels.
51 SIMD_ALIGNED(const int32 kYOffset[4]) = {16, 16, 16, 16};
52 
Clamp(int value)53 static inline int Clamp(int value) {
54   if (value < 0)
55     return 0;
56   if (value > 255)
57     return 255;
58   return value;
59 }
60 
RGBToY(int r,int g,int b)61 static inline int RGBToY(int r, int g, int b) {
62   int y = ConvertRGBAToYUV_kTable[0] * b +
63       ConvertRGBAToYUV_kTable[1] * g +
64       ConvertRGBAToYUV_kTable[2] * r;
65   y >>= FIX_SHIFT;
66   return Clamp(y + 16);
67 }
68 
RGBToU(int r,int g,int b,int shift)69 static inline int RGBToU(int r, int g, int b, int shift) {
70   int u = ConvertRGBAToYUV_kTable[8] * b +
71       ConvertRGBAToYUV_kTable[9] * g +
72       ConvertRGBAToYUV_kTable[10] * r;
73   u >>= FIX_SHIFT + shift;
74   return Clamp(u + 128);
75 }
76 
RGBToV(int r,int g,int b,int shift)77 static inline int RGBToV(int r, int g, int b, int shift) {
78   int v = ConvertRGBAToYUV_kTable[16] * b +
79       ConvertRGBAToYUV_kTable[17] * g +
80       ConvertRGBAToYUV_kTable[18] * r;
81   v >>= FIX_SHIFT + shift;
82   return Clamp(v + 128);
83 }
84 
85 #define CONVERT_Y(rgb_buf, y_buf) \
86   b = *rgb_buf++; \
87   g = *rgb_buf++; \
88   r = *rgb_buf++; \
89   ++rgb_buf;      \
90   sum_b += b;     \
91   sum_g += g;     \
92   sum_r += r;     \
93   *y_buf++ = RGBToY(r, g, b);
94 
ConvertRGBToYUV_V2H2(const uint8 * rgb_buf_1,const uint8 * rgb_buf_2,uint8 * y_buf_1,uint8 * y_buf_2,uint8 * u_buf,uint8 * v_buf)95 static inline void ConvertRGBToYUV_V2H2(const uint8* rgb_buf_1,
96                                         const uint8* rgb_buf_2,
97                                         uint8* y_buf_1,
98                                         uint8* y_buf_2,
99                                         uint8* u_buf,
100                                         uint8* v_buf) {
101   int sum_b = 0;
102   int sum_g = 0;
103   int sum_r = 0;
104   int r, g, b;
105 
106 
107 
108   CONVERT_Y(rgb_buf_1, y_buf_1);
109   CONVERT_Y(rgb_buf_1, y_buf_1);
110   CONVERT_Y(rgb_buf_2, y_buf_2);
111   CONVERT_Y(rgb_buf_2, y_buf_2);
112   *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 2);
113   *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 2);
114 }
115 
ConvertRGBToYUV_V2H1(const uint8 * rgb_buf_1,const uint8 * rgb_buf_2,uint8 * y_buf_1,uint8 * y_buf_2,uint8 * u_buf,uint8 * v_buf)116 static inline void ConvertRGBToYUV_V2H1(const uint8* rgb_buf_1,
117                                         const uint8* rgb_buf_2,
118                                         uint8* y_buf_1,
119                                         uint8* y_buf_2,
120                                         uint8* u_buf,
121                                         uint8* v_buf) {
122   int sum_b = 0;
123   int sum_g = 0;
124   int sum_r = 0;
125   int r, g, b;
126 
127   CONVERT_Y(rgb_buf_1, y_buf_1);
128   CONVERT_Y(rgb_buf_2, y_buf_2);
129   *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 1);
130   *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 1);
131 }
132 
ConvertRGBToYUV_V1H2(const uint8 * rgb_buf,uint8 * y_buf,uint8 * u_buf,uint8 * v_buf)133 static inline void ConvertRGBToYUV_V1H2(const uint8* rgb_buf,
134                                        uint8* y_buf,
135                                        uint8* u_buf,
136                                        uint8* v_buf) {
137   int sum_b = 0;
138   int sum_g = 0;
139   int sum_r = 0;
140   int r, g, b;
141 
142   CONVERT_Y(rgb_buf, y_buf);
143   CONVERT_Y(rgb_buf, y_buf);
144   *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 1);
145   *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 1);
146 }
147 
ConvertRGBToYUV_V1H1(const uint8 * rgb_buf,uint8 * y_buf,uint8 * u_buf,uint8 * v_buf)148 static inline void ConvertRGBToYUV_V1H1(const uint8* rgb_buf,
149                                        uint8* y_buf,
150                                        uint8* u_buf,
151                                        uint8* v_buf) {
152   int sum_b = 0;
153   int sum_g = 0;
154   int sum_r = 0;
155   int r, g, b;
156 
157   CONVERT_Y(rgb_buf, y_buf);
158   *u_buf++ = RGBToU(r, g, b, 0);
159   *v_buf++ = RGBToV(r, g, b, 0);
160 }
161 
ConvertRGB32ToYUVRow_SSE2(const uint8 * rgb_buf_1,const uint8 * rgb_buf_2,uint8 * y_buf_1,uint8 * y_buf_2,uint8 * u_buf,uint8 * v_buf,int width)162 static void ConvertRGB32ToYUVRow_SSE2(const uint8* rgb_buf_1,
163                                       const uint8* rgb_buf_2,
164                                       uint8* y_buf_1,
165                                       uint8* y_buf_2,
166                                       uint8* u_buf,
167                                       uint8* v_buf,
168                                       int width) {
169   while (width >= 4) {
170     // Name for the Y pixels:
171     // Row 1: a b c d
172     // Row 2: e f g h
173     //
174     // First row 4 pixels.
175     __m128i rgb_row_1 = _mm_loadu_si128(
176         reinterpret_cast<const __m128i*>(rgb_buf_1));
177     __m128i zero_1 = _mm_xor_si128(rgb_row_1, rgb_row_1);
178 
179     __m128i y_table = _mm_load_si128(
180         reinterpret_cast<const __m128i*>(ConvertRGBAToYUV_kTable));
181 
182     __m128i rgb_a_b = _mm_unpackhi_epi8(rgb_row_1, zero_1);
183     rgb_a_b = _mm_madd_epi16(rgb_a_b, y_table);
184 
185     __m128i rgb_c_d = _mm_unpacklo_epi8(rgb_row_1, zero_1);
186     rgb_c_d = _mm_madd_epi16(rgb_c_d, y_table);
187 
188     // Do a crazh shuffle so that we get:
189     //  v------------ Multiply Add
190     // BG: a b c d
191     // A0: a b c d
192     __m128i bg_abcd = _mm_castps_si128(
193         _mm_shuffle_ps(
194             _mm_castsi128_ps(rgb_c_d),
195             _mm_castsi128_ps(rgb_a_b),
196             (3 << 6) | (1 << 4) | (3 << 2) | 1));
197     __m128i r_abcd = _mm_castps_si128(
198         _mm_shuffle_ps(
199             _mm_castsi128_ps(rgb_c_d),
200             _mm_castsi128_ps(rgb_a_b),
201             (2 << 6) | (2 << 2)));
202     __m128i y_abcd = _mm_add_epi32(bg_abcd, r_abcd);
203 
204     // Down shift back to 8bits range.
205     __m128i y_offset = _mm_load_si128(
206         reinterpret_cast<const __m128i*>(kYOffset));
207     y_abcd = _mm_srai_epi32(y_abcd, FIX_SHIFT);
208     y_abcd = _mm_add_epi32(y_abcd, y_offset);
209     y_abcd = _mm_packs_epi32(y_abcd, y_abcd);
210     y_abcd = _mm_packus_epi16(y_abcd, y_abcd);
211     *reinterpret_cast<uint32*>(y_buf_1) = _mm_cvtsi128_si32(y_abcd);
212     y_buf_1 += 4;
213 
214     // Second row 4 pixels.
215     __m128i rgb_row_2 = _mm_loadu_si128(
216         reinterpret_cast<const __m128i*>(rgb_buf_2));
217     __m128i zero_2 = _mm_xor_si128(rgb_row_2, rgb_row_2);
218     __m128i rgb_e_f = _mm_unpackhi_epi8(rgb_row_2, zero_2);
219     __m128i rgb_g_h = _mm_unpacklo_epi8(rgb_row_2, zero_2);
220 
221     // Add two rows together.
222     __m128i rgb_ae_bf =
223         _mm_add_epi16(_mm_unpackhi_epi8(rgb_row_1, zero_2), rgb_e_f);
224     __m128i rgb_cg_dh =
225         _mm_add_epi16(_mm_unpacklo_epi8(rgb_row_1, zero_2), rgb_g_h);
226 
227     // Multiply add like the previous row.
228     rgb_e_f = _mm_madd_epi16(rgb_e_f, y_table);
229     rgb_g_h = _mm_madd_epi16(rgb_g_h, y_table);
230 
231     __m128i bg_efgh = _mm_castps_si128(
232         _mm_shuffle_ps(_mm_castsi128_ps(rgb_g_h),
233                        _mm_castsi128_ps(rgb_e_f),
234                        (3 << 6) | (1 << 4) | (3 << 2) | 1));
235     __m128i r_efgh = _mm_castps_si128(
236         _mm_shuffle_ps(_mm_castsi128_ps(rgb_g_h),
237                        _mm_castsi128_ps(rgb_e_f),
238                        (2 << 6) | (2 << 2)));
239     __m128i y_efgh = _mm_add_epi32(bg_efgh, r_efgh);
240     y_efgh = _mm_srai_epi32(y_efgh, FIX_SHIFT);
241     y_efgh = _mm_add_epi32(y_efgh, y_offset);
242     y_efgh = _mm_packs_epi32(y_efgh, y_efgh);
243     y_efgh = _mm_packus_epi16(y_efgh, y_efgh);
244     *reinterpret_cast<uint32*>(y_buf_2) = _mm_cvtsi128_si32(y_efgh);
245     y_buf_2 += 4;
246 
247     __m128i rgb_ae_cg = _mm_castps_si128(
248         _mm_shuffle_ps(_mm_castsi128_ps(rgb_cg_dh),
249                        _mm_castsi128_ps(rgb_ae_bf),
250                        (3 << 6) | (2 << 4) | (3 << 2) | 2));
251     __m128i rgb_bf_dh = _mm_castps_si128(
252         _mm_shuffle_ps(_mm_castsi128_ps(rgb_cg_dh),
253                        _mm_castsi128_ps(rgb_ae_bf),
254                        (1 << 6) | (1 << 2)));
255 
256     // This is a 2x2 subsampling for 2 pixels.
257     __m128i rgb_abef_cdgh = _mm_add_epi16(rgb_ae_cg, rgb_bf_dh);
258 
259     // Do a multiply add with U table.
260     __m128i u_a_b = _mm_madd_epi16(
261         rgb_abef_cdgh,
262         _mm_load_si128(
263             reinterpret_cast<const __m128i*>(ConvertRGBAToYUV_kTable + 8)));
264     u_a_b = _mm_add_epi32(_mm_shuffle_epi32(u_a_b, ((3 << 2) | 1)),
265                           _mm_shuffle_epi32(u_a_b, (2 << 2)));
266     // Right shift 14 because of 12 from fixed point and 2 from subsampling.
267     u_a_b = _mm_srai_epi32(u_a_b, FIX_SHIFT + 2);
268     __m128i uv_offset = _mm_slli_epi32(y_offset, 3);
269     u_a_b = _mm_add_epi32(u_a_b, uv_offset);
270     u_a_b = _mm_packs_epi32(u_a_b, u_a_b);
271     u_a_b = _mm_packus_epi16(u_a_b, u_a_b);
272     *reinterpret_cast<uint16*>(u_buf) = _mm_extract_epi16(u_a_b, 0);
273     u_buf += 2;
274 
275     __m128i v_a_b = _mm_madd_epi16(
276         rgb_abef_cdgh,
277         _mm_load_si128(
278             reinterpret_cast<const __m128i*>(ConvertRGBAToYUV_kTable + 16)));
279     v_a_b = _mm_add_epi32(_mm_shuffle_epi32(v_a_b, ((3 << 2) | 1)),
280                           _mm_shuffle_epi32(v_a_b, (2 << 2)));
281     v_a_b = _mm_srai_epi32(v_a_b, FIX_SHIFT + 2);
282     v_a_b = _mm_add_epi32(v_a_b, uv_offset);
283     v_a_b = _mm_packs_epi32(v_a_b, v_a_b);
284     v_a_b = _mm_packus_epi16(v_a_b, v_a_b);
285     *reinterpret_cast<uint16*>(v_buf) = _mm_extract_epi16(v_a_b, 0);
286     v_buf += 2;
287 
288     rgb_buf_1 += 16;
289     rgb_buf_2 += 16;
290 
291     // Move forward by 4 pixels.
292     width -= 4;
293   }
294 
295   // Just use C code to convert the remaining pixels.
296   if (width >= 2) {
297     ConvertRGBToYUV_V2H2(rgb_buf_1, rgb_buf_2, y_buf_1, y_buf_2, u_buf, v_buf);
298     rgb_buf_1 += 8;
299     rgb_buf_2 += 8;
300     y_buf_1 += 2;
301     y_buf_2 += 2;
302     ++u_buf;
303     ++v_buf;
304     width -= 2;
305   }
306 
307   if (width)
308     ConvertRGBToYUV_V2H1(rgb_buf_1, rgb_buf_2, y_buf_1, y_buf_2, u_buf, v_buf);
309 }
310 
ConvertRGB32ToYUV_SSE2(const uint8 * rgbframe,uint8 * yplane,uint8 * uplane,uint8 * vplane,int width,int height,int rgbstride,int ystride,int uvstride)311 extern void ConvertRGB32ToYUV_SSE2(const uint8* rgbframe,
312                                    uint8* yplane,
313                                    uint8* uplane,
314                                    uint8* vplane,
315                                    int width,
316                                    int height,
317                                    int rgbstride,
318                                    int ystride,
319                                    int uvstride) {
320   while (height >= 2) {
321     ConvertRGB32ToYUVRow_SSE2(rgbframe,
322                               rgbframe + rgbstride,
323                               yplane,
324                               yplane + ystride,
325                               uplane,
326                               vplane,
327                               width);
328     rgbframe += 2 * rgbstride;
329     yplane += 2 * ystride;
330     uplane += uvstride;
331     vplane += uvstride;
332     height -= 2;
333   }
334 
335   if (!height)
336     return;
337 
338   // Handle the last row.
339   while (width >= 2) {
340     ConvertRGBToYUV_V1H2(rgbframe, yplane, uplane, vplane);
341     rgbframe += 8;
342     yplane += 2;
343     ++uplane;
344     ++vplane;
345     width -= 2;
346   }
347 
348   if (width)
349     ConvertRGBToYUV_V1H1(rgbframe, yplane, uplane, vplane);
350 }
351 
ConvertRGB32ToYUV_SSE2_Reference(const uint8 * rgbframe,uint8 * yplane,uint8 * uplane,uint8 * vplane,int width,int height,int rgbstride,int ystride,int uvstride)352 void ConvertRGB32ToYUV_SSE2_Reference(const uint8* rgbframe,
353                                       uint8* yplane,
354                                       uint8* uplane,
355                                       uint8* vplane,
356                                       int width,
357                                       int height,
358                                       int rgbstride,
359                                       int ystride,
360                                       int uvstride) {
361   while (height >= 2) {
362     int i = 0;
363 
364     // Convert a 2x2 block.
365     while (i + 2 <= width) {
366       ConvertRGBToYUV_V2H2(rgbframe + i * 4,
367                            rgbframe + rgbstride + i * 4,
368                            yplane + i,
369                            yplane + ystride + i,
370                            uplane + i / 2,
371                            vplane + i / 2);
372       i += 2;
373     }
374 
375     // Convert the last pixel of two rows.
376     if (i < width) {
377       ConvertRGBToYUV_V2H1(rgbframe + i * 4,
378                            rgbframe + rgbstride + i * 4,
379                            yplane + i,
380                            yplane + ystride + i,
381                            uplane + i / 2,
382                            vplane + i / 2);
383     }
384 
385     rgbframe += 2 * rgbstride;
386     yplane += 2 * ystride;
387     uplane += uvstride;
388     vplane += uvstride;
389     height -= 2;
390   }
391 
392   if (!height)
393     return;
394 
395   // Handle the last row.
396   while (width >= 2) {
397     ConvertRGBToYUV_V1H2(rgbframe, yplane, uplane, vplane);
398     rgbframe += 8;
399     yplane += 2;
400     ++uplane;
401     ++vplane;
402     width -= 2;
403   }
404 
405   // Handle the last pixel in the last row.
406   if (width)
407     ConvertRGBToYUV_V1H1(rgbframe, yplane, uplane, vplane);
408 }
409 
410 }  // namespace media
411