1 /*
2 * Copyright (c) 2011 The LibYuv project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "libyuv/convert.h"
12
13 #include "conversion_tables.h"
14 #include "libyuv/basic_types.h"
15 #include "libyuv/cpu_id.h"
16 #include "row.h"
17
18 //#define SCALEOPT //Currently for windows only. June 2010
19
20 #ifdef SCALEOPT
21 #include <emmintrin.h>
22 #endif
23
24 namespace libyuv {
25
Clip(int32 val)26 static inline uint8 Clip(int32 val) {
27 if (val < 0) {
28 return (uint8) 0;
29 } else if (val > 255){
30 return (uint8) 255;
31 }
32 return (uint8) val;
33 }
34
I420ToRGB24(const uint8 * src_y,int src_stride_y,const uint8 * src_u,int src_stride_u,const uint8 * src_v,int src_stride_v,uint8 * dst_frame,int dst_stride_frame,int width,int height)35 int I420ToRGB24(const uint8* src_y, int src_stride_y,
36 const uint8* src_u, int src_stride_u,
37 const uint8* src_v, int src_stride_v,
38 uint8* dst_frame, int dst_stride_frame,
39 int width, int height) {
40 if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
41 return -1;
42 }
43
44 // RGB orientation - bottom up
45 // TODO(fbarchard): support inversion
46 uint8* out = dst_frame + dst_stride_frame * height - dst_stride_frame;
47 uint8* out2 = out - dst_stride_frame;
48 int h, w;
49 int tmp_r, tmp_g, tmp_b;
50 const uint8 *y1, *y2 ,*u, *v;
51 y1 = src_y;
52 y2 = y1 + src_stride_y;
53 u = src_u;
54 v = src_v;
55 for (h = ((height + 1) >> 1); h > 0; h--){
56 // 2 rows at a time, 2 y's at a time
57 for (w = 0; w < ((width + 1) >> 1); w++){
58 // Vertical and horizontal sub-sampling
59 tmp_r = (int32)((mapYc[y1[0]] + mapVcr[v[0]] + 128) >> 8);
60 tmp_g = (int32)((mapYc[y1[0]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
61 tmp_b = (int32)((mapYc[y1[0]] + mapUcb[u[0]] + 128) >> 8);
62 out[0] = Clip(tmp_b);
63 out[1] = Clip(tmp_g);
64 out[2] = Clip(tmp_r);
65
66 tmp_r = (int32)((mapYc[y1[1]] + mapVcr[v[0]] + 128) >> 8);
67 tmp_g = (int32)((mapYc[y1[1]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
68 tmp_b = (int32)((mapYc[y1[1]] + mapUcb[u[0]] + 128) >> 8);
69 out[3] = Clip(tmp_b);
70 out[4] = Clip(tmp_g);
71 out[5] = Clip(tmp_r);
72
73 tmp_r = (int32)((mapYc[y2[0]] + mapVcr[v[0]] + 128) >> 8);
74 tmp_g = (int32)((mapYc[y2[0]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
75 tmp_b = (int32)((mapYc[y2[0]] + mapUcb[u[0]] + 128) >> 8);
76 out2[0] = Clip(tmp_b);
77 out2[1] = Clip(tmp_g);
78 out2[2] = Clip(tmp_r);
79
80 tmp_r = (int32)((mapYc[y2[1]] + mapVcr[v[0]] + 128) >> 8);
81 tmp_g = (int32)((mapYc[y2[1]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
82 tmp_b = (int32)((mapYc[y2[1]] + mapUcb[u[0]] + 128) >> 8);
83 out2[3] = Clip(tmp_b);
84 out2[4] = Clip(tmp_g);
85 out2[5] = Clip(tmp_r);
86
87 out += 6;
88 out2 += 6;
89 y1 += 2;
90 y2 += 2;
91 u++;
92 v++;
93 }
94 y1 += src_stride_y + src_stride_y - width;
95 y2 += src_stride_y + src_stride_y - width;
96 u += src_stride_u - ((width + 1) >> 1);
97 v += src_stride_v - ((width + 1) >> 1);
98 out -= dst_stride_frame * 3;
99 out2 -= dst_stride_frame * 3;
100 } // end height for
101 return 0;
102 }
103
104 // Little Endian...
I420ToARGB4444(const uint8 * src_y,int src_stride_y,const uint8 * src_u,int src_stride_u,const uint8 * src_v,int src_stride_v,uint8 * dst_frame,int dst_stride_frame,int width,int height)105 int I420ToARGB4444(const uint8* src_y, int src_stride_y,
106 const uint8* src_u, int src_stride_u,
107 const uint8* src_v, int src_stride_v,
108 uint8* dst_frame, int dst_stride_frame,
109 int width, int height) {
110 if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
111 return -1;
112 }
113
114 // RGB orientation - bottom up
115 uint8* out = dst_frame + dst_stride_frame * (height - 1);
116 uint8* out2 = out - dst_stride_frame;
117 int tmp_r, tmp_g, tmp_b;
118 const uint8 *y1,*y2, *u, *v;
119 y1 = src_y;
120 y2 = y1 + src_stride_y;
121 u = src_u;
122 v = src_v;
123 int h, w;
124
125 for (h = ((height + 1) >> 1); h > 0; h--) {
126 // 2 rows at a time, 2 y's at a time
127 for (w = 0; w < ((width + 1) >> 1); w++) {
128 // Vertical and horizontal sub-sampling
129 // Convert to RGB888 and re-scale to 4 bits
130 tmp_r = (int32)((mapYc[y1[0]] + mapVcr[v[0]] + 128) >> 8);
131 tmp_g = (int32)((mapYc[y1[0]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
132 tmp_b = (int32)((mapYc[y1[0]] + mapUcb[u[0]] + 128) >> 8);
133 out[0] =(uint8)((Clip(tmp_g) & 0xf0) + (Clip(tmp_b) >> 4));
134 out[1] = (uint8)(0xf0 + (Clip(tmp_r) >> 4));
135
136 tmp_r = (int32)((mapYc[y1[1]] + mapVcr[v[0]] + 128) >> 8);
137 tmp_g = (int32)((mapYc[y1[1]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
138 tmp_b = (int32)((mapYc[y1[1]] + mapUcb[u[0]] + 128) >> 8);
139 out[2] = (uint8)((Clip(tmp_g) & 0xf0 ) + (Clip(tmp_b) >> 4));
140 out[3] = (uint8)(0xf0 + (Clip(tmp_r) >> 4));
141
142 tmp_r = (int32)((mapYc[y2[0]] + mapVcr[v[0]] + 128) >> 8);
143 tmp_g = (int32)((mapYc[y2[0]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
144 tmp_b = (int32)((mapYc[y2[0]] + mapUcb[u[0]] + 128) >> 8);
145 out2[0] = (uint8)((Clip(tmp_g) & 0xf0 ) + (Clip(tmp_b) >> 4));
146 out2[1] = (uint8) (0xf0 + (Clip(tmp_r) >> 4));
147
148 tmp_r = (int32)((mapYc[y2[1]] + mapVcr[v[0]] + 128) >> 8);
149 tmp_g = (int32)((mapYc[y2[1]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
150 tmp_b = (int32)((mapYc[y2[1]] + mapUcb[u[0]] + 128) >> 8);
151 out2[2] = (uint8)((Clip(tmp_g) & 0xf0 ) + (Clip(tmp_b) >> 4));
152 out2[3] = (uint8)(0xf0 + (Clip(tmp_r) >> 4));
153
154 out += 4;
155 out2 += 4;
156 y1 += 2;
157 y2 += 2;
158 u++;
159 v++;
160 }
161 y1 += 2 * src_stride_y - width;
162 y2 += 2 * src_stride_y - width;
163 u += src_stride_u - ((width + 1) >> 1);
164 v += src_stride_v - ((width + 1) >> 1);
165 out -= (dst_stride_frame + width) * 2;
166 out2 -= (dst_stride_frame + width) * 2;
167 } // end height for
168 return 0;
169 }
170
171
I420ToRGB565(const uint8 * src_y,int src_stride_y,const uint8 * src_u,int src_stride_u,const uint8 * src_v,int src_stride_v,uint8 * dst_frame,int dst_stride_frame,int width,int height)172 int I420ToRGB565(const uint8* src_y, int src_stride_y,
173 const uint8* src_u, int src_stride_u,
174 const uint8* src_v, int src_stride_v,
175 uint8* dst_frame, int dst_stride_frame,
176 int width, int height) {
177 if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
178 return -1;
179 }
180
181 // Negative height means invert the image.
182 if (height < 0) {
183 height = -height;
184 src_y = src_y + (height - 1) * src_stride_y;
185 src_u = src_u + (height - 1) * src_stride_u;
186 src_v = src_v + (height - 1) * src_stride_v;
187 src_stride_y = -src_stride_y;
188 src_stride_u = -src_stride_u;
189 src_stride_v = -src_stride_v;
190 }
191 uint16* out = (uint16*)(dst_frame) + dst_stride_frame * (height - 1);
192 uint16* out2 = out - dst_stride_frame;
193
194 int tmp_r, tmp_g, tmp_b;
195 const uint8* y1,* y2, * u, * v;
196 y1 = src_y;
197 y2 = y1 + src_stride_y;
198 u = src_u;
199 v = src_v;
200 int h, w;
201
202 for (h = ((height + 1) >> 1); h > 0; h--){
203 // 2 rows at a time, 2 y's at a time
204 for (w = 0; w < ((width + 1) >> 1); w++){
205 // Vertical and horizontal sub-sampling
206 // 1. Convert to RGB888
207 // 2. Shift to adequate location (in the 16 bit word) - RGB 565
208
209 tmp_r = (int32)((mapYc[y1[0]] + mapVcr[v[0]] + 128) >> 8);
210 tmp_g = (int32)((mapYc[y1[0]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
211 tmp_b = (int32)((mapYc[y1[0]] + mapUcb[u[0]] + 128) >> 8);
212 out[0] = (uint16)((Clip(tmp_r) & 0xf8) << 8) + ((Clip(tmp_g)
213 & 0xfc) << 3) + (Clip(tmp_b) >> 3);
214
215 tmp_r = (int32)((mapYc[y1[1]] + mapVcr[v[0]] + 128) >> 8);
216 tmp_g = (int32)((mapYc[y1[1]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
217 tmp_b = (int32)((mapYc[y1[1]] + mapUcb[u[0]] + 128) >> 8);
218 out[1] = (uint16)((Clip(tmp_r) & 0xf8) << 8) + ((Clip(tmp_g)
219 & 0xfc) << 3) + (Clip(tmp_b ) >> 3);
220
221 tmp_r = (int32)((mapYc[y2[0]] + mapVcr[v[0]] + 128) >> 8);
222 tmp_g = (int32)((mapYc[y2[0]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
223 tmp_b = (int32)((mapYc[y2[0]] + mapUcb[u[0]] + 128) >> 8);
224 out2[0] = (uint16)((Clip(tmp_r) & 0xf8) << 8) + ((Clip(tmp_g)
225 & 0xfc) << 3) + (Clip(tmp_b) >> 3);
226
227 tmp_r = (int32)((mapYc[y2[1]] + mapVcr[v[0]] + 128) >> 8);
228 tmp_g = (int32)((mapYc[y2[1]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
229 tmp_b = (int32)((mapYc[y2[1]] + mapUcb[u[0]] + 128) >> 8);
230 out2[1] = (uint16)((Clip(tmp_r) & 0xf8) << 8) + ((Clip(tmp_g)
231 & 0xfc) << 3) + (Clip(tmp_b) >> 3);
232
233 y1 += 2;
234 y2 += 2;
235 out += 2;
236 out2 += 2;
237 u++;
238 v++;
239 }
240 y1 += 2 * src_stride_y - width;
241 y2 += 2 * src_stride_y - width;
242 u += src_stride_u - ((width + 1) >> 1);
243 v += src_stride_v - ((width + 1) >> 1);
244 out -= 2 * dst_stride_frame + width;
245 out2 -= 2 * dst_stride_frame + width;
246 }
247 return 0;
248 }
249
250
I420ToARGB1555(const uint8 * src_y,int src_stride_y,const uint8 * src_u,int src_stride_u,const uint8 * src_v,int src_stride_v,uint8 * dst_frame,int dst_stride_frame,int width,int height)251 int I420ToARGB1555(const uint8* src_y, int src_stride_y,
252 const uint8* src_u, int src_stride_u,
253 const uint8* src_v, int src_stride_v,
254 uint8* dst_frame, int dst_stride_frame,
255 int width, int height) {
256 if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
257 return -1;
258 }
259 uint16* out = (uint16*)(dst_frame) + dst_stride_frame * (height - 1);
260 uint16* out2 = out - dst_stride_frame ;
261 int32 tmp_r, tmp_g, tmp_b;
262 const uint8 *y1,*y2, *u, *v;
263 int h, w;
264
265 y1 = src_y;
266 y2 = y1 + src_stride_y;
267 u = src_u;
268 v = src_v;
269
270 for (h = ((height + 1) >> 1); h > 0; h--){
271 // 2 rows at a time, 2 y's at a time
272 for (w = 0; w < ((width + 1) >> 1); w++){
273 // Vertical and horizontal sub-sampling
274 // 1. Convert to RGB888
275 // 2. Shift to adequate location (in the 16 bit word) - RGB 555
276 // 3. Add 1 for alpha value
277 tmp_r = (int32)((mapYc[y1[0]] + mapVcr[v[0]] + 128) >> 8);
278 tmp_g = (int32)((mapYc[y1[0]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
279 tmp_b = (int32)((mapYc[y1[0]] + mapUcb[u[0]] + 128) >> 8);
280 out[0] = (uint16)(0x8000 + ((Clip(tmp_r) & 0xf8) << 10) +
281 ((Clip(tmp_g) & 0xf8) << 3) + (Clip(tmp_b) >> 3));
282
283 tmp_r = (int32)((mapYc[y1[1]] + mapVcr[v[0]] + 128) >> 8);
284 tmp_g = (int32)((mapYc[y1[1]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
285 tmp_b = (int32)((mapYc[y1[1]] + mapUcb[u[0]] + 128) >> 8);
286 out[1] = (uint16)(0x8000 + ((Clip(tmp_r) & 0xf8) << 10) +
287 ((Clip(tmp_g) & 0xf8) << 3) + (Clip(tmp_b) >> 3));
288
289 tmp_r = (int32)((mapYc[y2[0]] + mapVcr[v[0]] + 128) >> 8);
290 tmp_g = (int32)((mapYc[y2[0]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
291 tmp_b = (int32)((mapYc[y2[0]] + mapUcb[u[0]] + 128) >> 8);
292 out2[0] = (uint16)(0x8000 + ((Clip(tmp_r) & 0xf8) << 10) +
293 ((Clip(tmp_g) & 0xf8) << 3) + (Clip(tmp_b) >> 3));
294
295 tmp_r = (int32)((mapYc[y2[1]] + mapVcr[v[0]] + 128) >> 8);
296 tmp_g = (int32)((mapYc[y2[1]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
297 tmp_b = (int32)((mapYc[y2[1]] + mapUcb[u[0]] + 128) >> 8);
298 out2[1] = (uint16)(0x8000 + ((Clip(tmp_r) & 0xf8) << 10) +
299 ((Clip(tmp_g) & 0xf8) << 3) + (Clip(tmp_b) >> 3));
300
301 y1 += 2;
302 y2 += 2;
303 out += 2;
304 out2 += 2;
305 u++;
306 v++;
307 }
308 y1 += 2 * src_stride_y - width;
309 y2 += 2 * src_stride_y - width;
310 u += src_stride_u - ((width + 1) >> 1);
311 v += src_stride_v - ((width + 1) >> 1);
312 out -= 2 * dst_stride_frame + width;
313 out2 -= 2 * dst_stride_frame + width;
314 }
315 return 0;
316 }
317
318
I420ToYUY2(const uint8 * src_y,int src_stride_y,const uint8 * src_u,int src_stride_u,const uint8 * src_v,int src_stride_v,uint8 * dst_frame,int dst_stride_frame,int width,int height)319 int I420ToYUY2(const uint8* src_y, int src_stride_y,
320 const uint8* src_u, int src_stride_u,
321 const uint8* src_v, int src_stride_v,
322 uint8* dst_frame, int dst_stride_frame,
323 int width, int height) {
324 if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
325 return -1;
326 }
327
328 const uint8* in1 = src_y;
329 const uint8* in2 = src_y + src_stride_y;
330
331 uint8* out1 = dst_frame;
332 uint8* out2 = dst_frame + dst_stride_frame;
333
334 // YUY2 - Macro-pixel = 2 image pixels
335 // Y0U0Y1V0....Y2U2Y3V2...Y4U4Y5V4....
336 #ifndef SCALEOPT
337 for (int i = 0; i < ((height + 1) >> 1); i++){
338 for (int j = 0; j < ((width + 1) >> 1); j++){
339 out1[0] = in1[0];
340 out1[1] = *src_u;
341 out1[2] = in1[1];
342 out1[3] = *src_v;
343
344 out2[0] = in2[0];
345 out2[1] = *src_u;
346 out2[2] = in2[1];
347 out2[3] = *src_v;
348 out1 += 4;
349 out2 += 4;
350 src_u++;
351 src_v++;
352 in1 += 2;
353 in2 += 2;
354 }
355 in1 += 2 * src_stride_y - width;
356 in2 += 2 * src_stride_y - width;
357 src_u += src_stride_u - ((width + 1) >> 1);
358 src_v += src_stride_v - ((width + 1) >> 1);
359 out1 += dst_stride_frame + dst_stride_frame - 2 * width;
360 out2 += dst_stride_frame + dst_stride_frame - 2 * width;
361 }
362 #else
363 for (WebRtc_UWord32 i = 0; i < ((height + 1) >> 1);i++) {
364 int32 width__ = (width >> 4);
365 _asm
366 {
367 ;pusha
368 mov eax, DWORD PTR [in1] ;1939.33
369 mov ecx, DWORD PTR [in2] ;1939.33
370 mov ebx, DWORD PTR [src_u] ;1939.33
371 mov edx, DWORD PTR [src_v] ;1939.33
372 loop0:
373 movq xmm6, QWORD PTR [ebx] ;src_u
374 movq xmm0, QWORD PTR [edx] ;src_v
375 punpcklbw xmm6, xmm0 ;src_u, src_v mix
376 ;movdqa xmm1, xmm6
377 ;movdqa xmm2, xmm6
378 ;movdqa xmm4, xmm6
379
380 movdqu xmm3, XMMWORD PTR [eax] ;in1
381 movdqa xmm1, xmm3
382 punpcklbw xmm1, xmm6 ;in1, src_u, in1, src_v
383 mov esi, DWORD PTR [out1]
384 movdqu XMMWORD PTR [esi], xmm1 ;write to out1
385
386 movdqu xmm5, XMMWORD PTR [ecx] ;in2
387 movdqa xmm2, xmm5
388 punpcklbw xmm2, xmm6 ;in2, src_u, in2, src_v
389 mov edi, DWORD PTR [out2]
390 movdqu XMMWORD PTR [edi], xmm2 ;write to out2
391
392 punpckhbw xmm3, xmm6 ;in1, src_u, in1, src_v again
393 movdqu XMMWORD PTR [esi+16], xmm3 ;write to out1 again
394 add esi, 32
395 mov DWORD PTR [out1], esi
396
397 punpckhbw xmm5, xmm6 ;src_u, in2, src_v again
398 movdqu XMMWORD PTR [edi+16], xmm5 ;write to out2 again
399 add edi, 32
400 mov DWORD PTR [out2], edi
401
402 add ebx, 8
403 add edx, 8
404 add eax, 16
405 add ecx, 16
406
407 mov esi, DWORD PTR [width__]
408 sub esi, 1
409 mov DWORD PTR [width__], esi
410 jg loop0
411
412 mov DWORD PTR [in1], eax ;1939.33
413 mov DWORD PTR [in2], ecx ;1939.33
414 mov DWORD PTR [src_u], ebx ;1939.33
415 mov DWORD PTR [src_v], edx ;1939.33
416
417 ;popa
418 emms
419 }
420 in1 += 2 * src_stride_y - width;
421 in2 += 2 * src_stride_y - width;
422 out1 += dst_stride_frame + dst_stride_frame - 2 * width;
423 out2 += dst_stride_frame + dst_stride_frame - 2 * width;
424 }
425 #endif
426 return 0;
427 }
428
I420ToUYVY(const uint8 * src_y,int src_stride_y,const uint8 * src_u,int src_stride_u,const uint8 * src_v,int src_stride_v,uint8 * dst_frame,int dst_stride_frame,int width,int height)429 int I420ToUYVY(const uint8* src_y, int src_stride_y,
430 const uint8* src_u, int src_stride_u,
431 const uint8* src_v, int src_stride_v,
432 uint8* dst_frame, int dst_stride_frame,
433 int width, int height) {
434 if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
435 return -1;
436 }
437
438 int i = 0;
439 const uint8* y1 = src_y;
440 const uint8* y2 = y1 + src_stride_y;
441 const uint8* u = src_u;
442 const uint8* v = src_v;
443
444 uint8* out1 = dst_frame;
445 uint8* out2 = dst_frame + dst_stride_frame;
446
447 // Macro-pixel = 2 image pixels
448 // U0Y0V0Y1....U2Y2V2Y3...U4Y4V4Y5.....
449
450 #ifndef SCALEOPT
451 for (; i < ((height + 1) >> 1); i++) {
452 for (int j = 0; j < ((width + 1) >> 1); j++) {
453 out1[0] = *u;
454 out1[1] = y1[0];
455 out1[2] = *v;
456 out1[3] = y1[1];
457
458 out2[0] = *u;
459 out2[1] = y2[0];
460 out2[2] = *v;
461 out2[3] = y2[1];
462 out1 += 4;
463 out2 += 4;
464 u++;
465 v++;
466 y1 += 2;
467 y2 += 2;
468 }
469 y1 += 2 * src_stride_y - width;
470 y2 += 2 * src_stride_y - width;
471 u += src_stride_u - ((width + 1) >> 1);
472 v += src_stride_v - ((width + 1) >> 1);
473 out1 += 2 * (dst_stride_frame - width);
474 out2 += 2 * (dst_stride_frame - width);
475 }
476 #else
477 for (; i < (height >> 1);i++) {
478 int32 width__ = (width >> 4);
479 _asm
480 {
481 ;pusha
482 mov eax, DWORD PTR [in1] ;1939.33
483 mov ecx, DWORD PTR [in2] ;1939.33
484 mov ebx, DWORD PTR [src_u] ;1939.33
485 mov edx, DWORD PTR [src_v] ;1939.33
486 loop0:
487 movq xmm6, QWORD PTR [ebx] ;src_u
488 movq xmm0, QWORD PTR [edx] ;src_v
489 punpcklbw xmm6, xmm0 ;src_u, src_v mix
490 movdqa xmm1, xmm6
491 movdqa xmm2, xmm6
492 movdqa xmm4, xmm6
493
494 movdqu xmm3, XMMWORD PTR [eax] ;in1
495 punpcklbw xmm1, xmm3 ;src_u, in1, src_v
496 mov esi, DWORD PTR [out1]
497 movdqu XMMWORD PTR [esi], xmm1 ;write to out1
498
499 movdqu xmm5, XMMWORD PTR [ecx] ;in2
500 punpcklbw xmm2, xmm5 ;src_u, in2, src_v
501 mov edi, DWORD PTR [out2]
502 movdqu XMMWORD PTR [edi], xmm2 ;write to out2
503
504 punpckhbw xmm4, xmm3 ;src_u, in1, src_v again
505 movdqu XMMWORD PTR [esi+16], xmm4 ;write to out1 again
506 add esi, 32
507 mov DWORD PTR [out1], esi
508
509 punpckhbw xmm6, xmm5 ;src_u, in2, src_v again
510 movdqu XMMWORD PTR [edi+16], xmm6 ;write to out2 again
511 add edi, 32
512 mov DWORD PTR [out2], edi
513
514 add ebx, 8
515 add edx, 8
516 add eax, 16
517 add ecx, 16
518
519 mov esi, DWORD PTR [width__]
520 sub esi, 1
521 mov DWORD PTR [width__], esi
522 jg loop0
523
524 mov DWORD PTR [in1], eax ;1939.33
525 mov DWORD PTR [in2], ecx ;1939.33
526 mov DWORD PTR [src_u], ebx ;1939.33
527 mov DWORD PTR [src_v], edx ;1939.33
528
529 ;popa
530 emms
531 }
532 in1 += width;
533 in2 += width;
534 out1 += 2 * (dst_stride_frame - width);
535 out2 += 2 * (dst_stride_frame - width);
536 }
537 #endif
538 return 0;
539 }
540
541
NV12ToRGB565(const uint8 * src_y,int src_stride_y,const uint8 * src_uv,int src_stride_uv,uint8 * dst_frame,int dst_stride_frame,int width,int height)542 int NV12ToRGB565(const uint8* src_y, int src_stride_y,
543 const uint8* src_uv, int src_stride_uv,
544 uint8* dst_frame, int dst_stride_frame,
545 int width, int height) {
546 if (src_y == NULL || src_uv == NULL || dst_frame == NULL) {
547 return -1;
548 }
549
550 // Bi-Planar: Y plane followed by an interlaced U and V plane
551 const uint8* interlacedSrc = src_uv;
552 uint16* out = (uint16*)(src_y) + dst_stride_frame * (height - 1);
553 uint16* out2 = out - dst_stride_frame;
554 int32 tmp_r, tmp_g, tmp_b;
555 const uint8 *y1,*y2;
556 y1 = src_y;
557 y2 = y1 + src_stride_y;
558 int h, w;
559
560 for (h = ((height + 1) >> 1); h > 0; h--) {
561 // 2 rows at a time, 2 y's at a time
562 for (w = 0; w < ((width + 1) >> 1); w++) {
563 // Vertical and horizontal sub-sampling
564 // 1. Convert to RGB888
565 // 2. Shift to adequate location (in the 16 bit word) - RGB 565
566
567 tmp_r = (int32)((mapYc[y1[0]] + mapVcr[interlacedSrc[1]] + 128) >> 8);
568 tmp_g = (int32)((mapYc[y1[0]] + mapUcg[interlacedSrc[0]]
569 + mapVcg[interlacedSrc[1]] + 128) >> 8);
570 tmp_b = (int32)((mapYc[y1[0]] + mapUcb[interlacedSrc[0]] + 128) >> 8);
571 out[0] = (uint16)((Clip(tmp_r) & 0xf8) << 8) + ((Clip(tmp_g)
572 & 0xfc) << 3) + (Clip(tmp_b) >> 3);
573
574 tmp_r = (int32)((mapYc[y1[1]] + mapVcr[interlacedSrc[1]] + 128) >> 8);
575 tmp_g = (int32)((mapYc[y1[1]] + mapUcg[interlacedSrc[0]]
576 + mapVcg[interlacedSrc[1]] + 128) >> 8);
577 tmp_b = (int32)((mapYc[y1[1]] + mapUcb[interlacedSrc[0]] + 128) >> 8);
578 out[1] = (uint16)((Clip(tmp_r) & 0xf8) << 8) + ((Clip(tmp_g)
579 & 0xfc) << 3) + (Clip(tmp_b ) >> 3);
580
581 tmp_r = (int32)((mapYc[y2[0]] + mapVcr[interlacedSrc[1]] + 128) >> 8);
582 tmp_g = (int32)((mapYc[y2[0]] + mapUcg[interlacedSrc[0]]
583 + mapVcg[interlacedSrc[1]] + 128) >> 8);
584 tmp_b = (int32)((mapYc[y2[0]] + mapUcb[interlacedSrc[0]] + 128) >> 8);
585 out2[0] = (uint16)((Clip(tmp_r) & 0xf8) << 8) + ((Clip(tmp_g)
586 & 0xfc) << 3) + (Clip(tmp_b) >> 3);
587
588 tmp_r = (int32)((mapYc[y2[1]] + mapVcr[interlacedSrc[1]]
589 + 128) >> 8);
590 tmp_g = (int32)((mapYc[y2[1]] + mapUcg[interlacedSrc[0]]
591 + mapVcg[interlacedSrc[1]] + 128) >> 8);
592 tmp_b = (int32)((mapYc[y2[1]] + mapUcb[interlacedSrc[0]] + 128) >> 8);
593 out2[1] = (uint16)((Clip(tmp_r) & 0xf8) << 8) + ((Clip(tmp_g)
594 & 0xfc) << 3) + (Clip(tmp_b) >> 3);
595
596 y1 += 2;
597 y2 += 2;
598 out += 2;
599 out2 += 2;
600 interlacedSrc += 2;
601 }
602 y1 += 2 * src_stride_y - width;
603 y2 += 2 * src_stride_y - width;
604 interlacedSrc += src_stride_uv - ((width + 1) >> 1);
605 out -= 3 * dst_stride_frame + dst_stride_frame - width;
606 out2 -= 3 * dst_stride_frame + dst_stride_frame - width;
607 }
608 return 0;
609 }
610
611 // TODO(fbarchard): Deprecated - this is same as BG24ToARGB with -height
RGB24ToARGB(const uint8 * src_frame,int src_stride_frame,uint8 * dst_frame,int dst_stride_frame,int width,int height)612 int RGB24ToARGB(const uint8* src_frame, int src_stride_frame,
613 uint8* dst_frame, int dst_stride_frame,
614 int width, int height) {
615 if (src_frame == NULL || dst_frame == NULL) {
616 return -1;
617 }
618
619 int i, j, offset;
620 uint8* outFrame = dst_frame;
621 const uint8* inFrame = src_frame;
622
623 outFrame += dst_stride_frame * (height - 1) * 4;
624 for (i = 0; i < height; i++) {
625 for (j = 0; j < width; j++) {
626 offset = j * 4;
627 outFrame[0 + offset] = inFrame[0];
628 outFrame[1 + offset] = inFrame[1];
629 outFrame[2 + offset] = inFrame[2];
630 outFrame[3 + offset] = 0xff;
631 inFrame += 3;
632 }
633 outFrame -= 4 * (dst_stride_frame - width);
634 inFrame += src_stride_frame - width;
635 }
636 return 0;
637 }
638
ARGBToI420(const uint8 * src_frame,int src_stride_frame,uint8 * dst_y,int dst_stride_y,uint8 * dst_u,int dst_stride_u,uint8 * dst_v,int dst_stride_v,int width,int height)639 int ARGBToI420(const uint8* src_frame, int src_stride_frame,
640 uint8* dst_y, int dst_stride_y,
641 uint8* dst_u, int dst_stride_u,
642 uint8* dst_v, int dst_stride_v,
643 int width, int height) {
644 if (height < 0) {
645 height = -height;
646 src_frame = src_frame + (height - 1) * src_stride_frame;
647 src_stride_frame = -src_stride_frame;
648 }
649 void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix);
650 void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
651 uint8* dst_u, uint8* dst_v, int width);
652 #if defined(HAS_ARGBTOYROW_SSSE3)
653 if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
654 (width % 16 == 0) &&
655 IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) &&
656 IS_ALIGNED(dst_y, 16) && (dst_stride_y % 16 == 0)) {
657 ARGBToYRow = ARGBToYRow_SSSE3;
658 } else
659 #endif
660 {
661 ARGBToYRow = ARGBToYRow_C;
662 }
663 #if defined(HAS_ARGBTOUVROW_SSSE3)
664 if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
665 (width % 16 == 0) &&
666 IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) &&
667 IS_ALIGNED(dst_u, 8) && (dst_stride_u % 8 == 0) &&
668 IS_ALIGNED(dst_v, 8) && (dst_stride_v % 8 == 0)) {
669 ARGBToUVRow = ARGBToUVRow_SSSE3;
670 } else
671 #endif
672 {
673 ARGBToUVRow = ARGBToUVRow_C;
674 }
675
676 for (int y = 0; y < (height - 1); y += 2) {
677 ARGBToUVRow(src_frame, src_stride_frame, dst_u, dst_v, width);
678 ARGBToYRow(src_frame, dst_y, width);
679 ARGBToYRow(src_frame + src_stride_frame, dst_y + dst_stride_y, width);
680 src_frame += src_stride_frame * 2;
681 dst_y += dst_stride_y * 2;
682 dst_u += dst_stride_u;
683 dst_v += dst_stride_v;
684 }
685 if (height & 1) {
686 ARGBToUVRow(src_frame, 0, dst_u, dst_v, width);
687 ARGBToYRow(src_frame, dst_y, width);
688 }
689 return 0;
690 }
691
BGRAToI420(const uint8 * src_frame,int src_stride_frame,uint8 * dst_y,int dst_stride_y,uint8 * dst_u,int dst_stride_u,uint8 * dst_v,int dst_stride_v,int width,int height)692 int BGRAToI420(const uint8* src_frame, int src_stride_frame,
693 uint8* dst_y, int dst_stride_y,
694 uint8* dst_u, int dst_stride_u,
695 uint8* dst_v, int dst_stride_v,
696 int width, int height) {
697 if (height < 0) {
698 height = -height;
699 src_frame = src_frame + (height - 1) * src_stride_frame;
700 src_stride_frame = -src_stride_frame;
701 }
702 void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix);
703 void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
704 uint8* dst_u, uint8* dst_v, int width);
705 #if defined(HAS_BGRATOYROW_SSSE3)
706 if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
707 (width % 16 == 0) &&
708 IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) &&
709 IS_ALIGNED(dst_y, 16) && (dst_stride_y % 16 == 0)) {
710 ARGBToYRow = BGRAToYRow_SSSE3;
711 } else
712 #endif
713 {
714 ARGBToYRow = BGRAToYRow_C;
715 }
716 #if defined(HAS_BGRATOUVROW_SSSE3)
717 if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
718 (width % 16 == 0) &&
719 IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) &&
720 IS_ALIGNED(dst_u, 8) && (dst_stride_u % 8 == 0) &&
721 IS_ALIGNED(dst_v, 8) && (dst_stride_v % 8 == 0)) {
722 ARGBToUVRow = BGRAToUVRow_SSSE3;
723 } else
724 #endif
725 {
726 ARGBToUVRow = BGRAToUVRow_C;
727 }
728
729 for (int y = 0; y < (height - 1); y += 2) {
730 ARGBToUVRow(src_frame, src_stride_frame, dst_u, dst_v, width);
731 ARGBToYRow(src_frame, dst_y, width);
732 ARGBToYRow(src_frame + src_stride_frame, dst_y + dst_stride_y, width);
733 src_frame += src_stride_frame * 2;
734 dst_y += dst_stride_y * 2;
735 dst_u += dst_stride_u;
736 dst_v += dst_stride_v;
737 }
738 if (height & 1) {
739 ARGBToUVRow(src_frame, 0, dst_u, dst_v, width);
740 ARGBToYRow(src_frame, dst_y, width);
741 }
742 return 0;
743 }
744
ABGRToI420(const uint8 * src_frame,int src_stride_frame,uint8 * dst_y,int dst_stride_y,uint8 * dst_u,int dst_stride_u,uint8 * dst_v,int dst_stride_v,int width,int height)745 int ABGRToI420(const uint8* src_frame, int src_stride_frame,
746 uint8* dst_y, int dst_stride_y,
747 uint8* dst_u, int dst_stride_u,
748 uint8* dst_v, int dst_stride_v,
749 int width, int height) {
750 if (height < 0) {
751 height = -height;
752 src_frame = src_frame + (height - 1) * src_stride_frame;
753 src_stride_frame = -src_stride_frame;
754 }
755 void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix);
756 void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
757 uint8* dst_u, uint8* dst_v, int width);
758 #if defined(HAS_ABGRTOYROW_SSSE3)
759 if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
760 (width % 16 == 0) &&
761 IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) &&
762 IS_ALIGNED(dst_y, 16) && (dst_stride_y % 16 == 0)) {
763 ARGBToYRow = ABGRToYRow_SSSE3;
764 } else
765 #endif
766 {
767 ARGBToYRow = ABGRToYRow_C;
768 }
769 #if defined(HAS_ABGRTOUVROW_SSSE3)
770 if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
771 (width % 16 == 0) &&
772 IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) &&
773 IS_ALIGNED(dst_u, 8) && (dst_stride_u % 8 == 0) &&
774 IS_ALIGNED(dst_v, 8) && (dst_stride_v % 8 == 0)) {
775 ARGBToUVRow = ABGRToUVRow_SSSE3;
776 } else
777 #endif
778 {
779 ARGBToUVRow = ABGRToUVRow_C;
780 }
781
782 for (int y = 0; y < (height - 1); y += 2) {
783 ARGBToUVRow(src_frame, src_stride_frame, dst_u, dst_v, width);
784 ARGBToYRow(src_frame, dst_y, width);
785 ARGBToYRow(src_frame + src_stride_frame, dst_y + dst_stride_y, width);
786 src_frame += src_stride_frame * 2;
787 dst_y += dst_stride_y * 2;
788 dst_u += dst_stride_u;
789 dst_v += dst_stride_v;
790 }
791 if (height & 1) {
792 ARGBToUVRow(src_frame, 0, dst_u, dst_v, width);
793 ARGBToYRow(src_frame, dst_y, width);
794 }
795 return 0;
796 }
797
RGB24ToI420(const uint8 * src_frame,int src_stride_frame,uint8 * dst_y,int dst_stride_y,uint8 * dst_u,int dst_stride_u,uint8 * dst_v,int dst_stride_v,int width,int height)798 int RGB24ToI420(const uint8* src_frame, int src_stride_frame,
799 uint8* dst_y, int dst_stride_y,
800 uint8* dst_u, int dst_stride_u,
801 uint8* dst_v, int dst_stride_v,
802 int width, int height) {
803 if (height < 0) {
804 height = -height;
805 src_frame = src_frame + (height - 1) * src_stride_frame;
806 src_stride_frame = -src_stride_frame;
807 }
808 void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix);
809 void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
810 uint8* dst_u, uint8* dst_v, int width);
811 #if defined(HAS_RGB24TOYROW_SSSE3)
812 if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
813 (width % 16 == 0) &&
814 IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) &&
815 IS_ALIGNED(dst_y, 16) && (dst_stride_y % 16 == 0)) {
816 ARGBToYRow = RGB24ToYRow_SSSE3;
817 } else
818 #endif
819 {
820 ARGBToYRow = RGB24ToYRow_C;
821 }
822 #if defined(HAS_RGB24TOUVROW_SSSE3)
823 if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
824 (width % 16 == 0) &&
825 IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) &&
826 IS_ALIGNED(dst_u, 8) && (dst_stride_u % 8 == 0) &&
827 IS_ALIGNED(dst_v, 8) && (dst_stride_v % 8 == 0)) {
828 ARGBToUVRow = RGB24ToUVRow_SSSE3;
829 } else
830 #endif
831 {
832 ARGBToUVRow = RGB24ToUVRow_C;
833 }
834
835 for (int y = 0; y < (height - 1); y += 2) {
836 ARGBToUVRow(src_frame, src_stride_frame, dst_u, dst_v, width);
837 ARGBToYRow(src_frame, dst_y, width);
838 ARGBToYRow(src_frame + src_stride_frame, dst_y + dst_stride_y, width);
839 src_frame += src_stride_frame * 2;
840 dst_y += dst_stride_y * 2;
841 dst_u += dst_stride_u;
842 dst_v += dst_stride_v;
843 }
844 if (height & 1) {
845 ARGBToUVRow(src_frame, 0, dst_u, dst_v, width);
846 ARGBToYRow(src_frame, dst_y, width);
847 }
848 return 0;
849 }
850
RAWToI420(const uint8 * src_frame,int src_stride_frame,uint8 * dst_y,int dst_stride_y,uint8 * dst_u,int dst_stride_u,uint8 * dst_v,int dst_stride_v,int width,int height)851 int RAWToI420(const uint8* src_frame, int src_stride_frame,
852 uint8* dst_y, int dst_stride_y,
853 uint8* dst_u, int dst_stride_u,
854 uint8* dst_v, int dst_stride_v,
855 int width, int height) {
856 if (height < 0) {
857 height = -height;
858 src_frame = src_frame + (height - 1) * src_stride_frame;
859 src_stride_frame = -src_stride_frame;
860 }
861 void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix);
862 void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
863 uint8* dst_u, uint8* dst_v, int width);
864 #if defined(HAS_RAWTOYROW_SSSE3)
865 if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
866 (width % 16 == 0) &&
867 IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) &&
868 IS_ALIGNED(dst_y, 16) && (dst_stride_y % 16 == 0)) {
869 ARGBToYRow = RAWToYRow_SSSE3;
870 } else
871 #endif
872 {
873 ARGBToYRow = RAWToYRow_C;
874 }
875 #if defined(HAS_RAWTOUVROW_SSSE3)
876 if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
877 (width % 16 == 0) &&
878 IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) &&
879 IS_ALIGNED(dst_u, 8) && (dst_stride_u % 8 == 0) &&
880 IS_ALIGNED(dst_v, 8) && (dst_stride_v % 8 == 0)) {
881 ARGBToUVRow = RAWToUVRow_SSSE3;
882 } else
883 #endif
884 {
885 ARGBToUVRow = RAWToUVRow_C;
886 }
887
888 for (int y = 0; y < (height - 1); y += 2) {
889 ARGBToUVRow(src_frame, src_stride_frame, dst_u, dst_v, width);
890 ARGBToYRow(src_frame, dst_y, width);
891 ARGBToYRow(src_frame + src_stride_frame, dst_y + dst_stride_y, width);
892 src_frame += src_stride_frame * 2;
893 dst_y += dst_stride_y * 2;
894 dst_u += dst_stride_u;
895 dst_v += dst_stride_v;
896 }
897 if (height & 1) {
898 ARGBToUVRow(src_frame, 0, dst_u, dst_v, width);
899 ARGBToYRow(src_frame, dst_y, width);
900 }
901 return 0;
902 }
903
904 } // namespace libyuv
905