1 /******************************************************************************
2 *
3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 ******************************************************************************/
18 /**
19 *******************************************************************************
20 * @file
21 * ihevcd_fmt_conv.c
22 *
23 * @brief
24 * Contains functions for format conversion or frame copy of output buffer
25 *
26 * @author
27 * Harish
28 *
29 * @par List of Functions:
30 *
31 * @remarks
32 * None
33 *
34 *******************************************************************************
35 */
36 /*****************************************************************************/
37 /* File Includes */
38 /*****************************************************************************/
39 #include <stdio.h>
40 #include <stddef.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <assert.h>
44
45 #include "ihevc_typedefs.h"
46 #include "iv.h"
47 #include "ivd.h"
48 #include "ihevcd_cxa.h"
49 #include "ithread.h"
50
51 #include "ihevc_defs.h"
52 #include "ihevc_debug.h"
53 #include "ihevc_structs.h"
54 #include "ihevc_macros.h"
55 #include "ihevc_platform_macros.h"
56 #include "ihevc_cabac_tables.h"
57 #include "ihevc_disp_mgr.h"
58
59 #include "ihevcd_defs.h"
60 #include "ihevcd_function_selector.h"
61 #include "ihevcd_structs.h"
62 #include "ihevcd_error.h"
63 #include "ihevcd_nal.h"
64 #include "ihevcd_bitstream.h"
65 #include "ihevcd_fmt_conv.h"
66 #include "ihevcd_profile.h"
67
68 /* SIMD variants of format conversion modules do not support width less than 32 */
69 #define MIN_FMT_CONV_SIMD_WIDTH 32
70 /**
71 *******************************************************************************
72 *
73 * @brief Function used from copying a 420SP buffer
74 *
75 * @par Description
76 * Function used from copying a 420SP buffer
77 *
78 * @param[in] pu1_y_src
79 * Input Y pointer
80 *
81 * @param[in] pu1_uv_src
82 * Input UV pointer (UV is interleaved either in UV or VU format)
83 *
84 * @param[in] pu1_y_dst
85 * Output Y pointer
86 *
87 * @param[in] pu1_uv_dst
88 * Output UV pointer (UV is interleaved in the same format as that of input)
89 *
90 * @param[in] wd
91 * Width
92 *
93 * @param[in] ht
94 * Height
95 *
96 * @param[in] src_y_strd
97 * Input Y Stride
98 *
99 * @param[in] src_uv_strd
100 * Input UV stride
101 *
102 * @param[in] dst_y_strd
103 * Output Y stride
104 *
105 * @param[in] dst_uv_strd
106 * Output UV stride
107 *
108 * @returns None
109 *
110 * @remarks In case there is a need to perform partial frame copy then
111 * by passion appropriate source and destination pointers and appropriate
112 * values for wd and ht it can be done
113 *
114 *******************************************************************************
115 */
ihevcd_fmt_conv_420sp_to_rgb565(UWORD8 * pu1_y_src,UWORD8 * pu1_uv_src,UWORD16 * pu2_rgb_dst,WORD32 wd,WORD32 ht,WORD32 src_y_strd,WORD32 src_uv_strd,WORD32 dst_strd,WORD32 is_u_first)116 void ihevcd_fmt_conv_420sp_to_rgb565(UWORD8 *pu1_y_src,
117 UWORD8 *pu1_uv_src,
118 UWORD16 *pu2_rgb_dst,
119 WORD32 wd,
120 WORD32 ht,
121 WORD32 src_y_strd,
122 WORD32 src_uv_strd,
123 WORD32 dst_strd,
124 WORD32 is_u_first)
125 {
126
127
128 WORD16 i2_r, i2_g, i2_b;
129 UWORD32 u4_r, u4_g, u4_b;
130 WORD16 i2_i, i2_j;
131 UWORD8 *pu1_y_src_nxt;
132 UWORD16 *pu2_rgb_dst_NextRow;
133
134 UWORD8 *pu1_u_src, *pu1_v_src;
135
136 if(is_u_first)
137 {
138 pu1_u_src = (UWORD8 *)pu1_uv_src;
139 pu1_v_src = (UWORD8 *)pu1_uv_src + 1;
140 }
141 else
142 {
143 pu1_u_src = (UWORD8 *)pu1_uv_src + 1;
144 pu1_v_src = (UWORD8 *)pu1_uv_src;
145 }
146
147 pu1_y_src_nxt = pu1_y_src + src_y_strd;
148 pu2_rgb_dst_NextRow = pu2_rgb_dst + dst_strd;
149
150 for(i2_i = 0; i2_i < (ht >> 1); i2_i++)
151 {
152 for(i2_j = (wd >> 1); i2_j > 0; i2_j--)
153 {
154 i2_b = ((*pu1_u_src - 128) * COEFF4 >> 13);
155 i2_g = ((*pu1_u_src - 128) * COEFF2 + (*pu1_v_src - 128) * COEFF3) >> 13;
156 i2_r = ((*pu1_v_src - 128) * COEFF1) >> 13;
157
158 pu1_u_src += 2;
159 pu1_v_src += 2;
160 /* pixel 0 */
161 /* B */
162 u4_b = CLIP_U8(*pu1_y_src + i2_b);
163 u4_b >>= 3;
164 /* G */
165 u4_g = CLIP_U8(*pu1_y_src + i2_g);
166 u4_g >>= 2;
167 /* R */
168 u4_r = CLIP_U8(*pu1_y_src + i2_r);
169 u4_r >>= 3;
170
171 pu1_y_src++;
172 *pu2_rgb_dst++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
173
174 /* pixel 1 */
175 /* B */
176 u4_b = CLIP_U8(*pu1_y_src + i2_b);
177 u4_b >>= 3;
178 /* G */
179 u4_g = CLIP_U8(*pu1_y_src + i2_g);
180 u4_g >>= 2;
181 /* R */
182 u4_r = CLIP_U8(*pu1_y_src + i2_r);
183 u4_r >>= 3;
184
185 pu1_y_src++;
186 *pu2_rgb_dst++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
187
188 /* pixel 2 */
189 /* B */
190 u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
191 u4_b >>= 3;
192 /* G */
193 u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
194 u4_g >>= 2;
195 /* R */
196 u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
197 u4_r >>= 3;
198
199 pu1_y_src_nxt++;
200 *pu2_rgb_dst_NextRow++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
201
202 /* pixel 3 */
203 /* B */
204 u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
205 u4_b >>= 3;
206 /* G */
207 u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
208 u4_g >>= 2;
209 /* R */
210 u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
211 u4_r >>= 3;
212
213 pu1_y_src_nxt++;
214 *pu2_rgb_dst_NextRow++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
215
216 }
217
218 pu1_u_src = pu1_u_src + src_uv_strd - wd;
219 pu1_v_src = pu1_v_src + src_uv_strd - wd;
220
221 pu1_y_src = pu1_y_src + (src_y_strd << 1) - wd;
222 pu1_y_src_nxt = pu1_y_src_nxt + (src_y_strd << 1) - wd;
223
224 pu2_rgb_dst = pu2_rgb_dst_NextRow - wd + dst_strd;
225 pu2_rgb_dst_NextRow = pu2_rgb_dst_NextRow + (dst_strd << 1) - wd;
226 }
227
228
229 }
230
ihevcd_fmt_conv_420sp_to_rgba8888(UWORD8 * pu1_y_src,UWORD8 * pu1_uv_src,UWORD32 * pu4_rgba_dst,WORD32 wd,WORD32 ht,WORD32 src_y_strd,WORD32 src_uv_strd,WORD32 dst_strd,WORD32 is_u_first)231 void ihevcd_fmt_conv_420sp_to_rgba8888(UWORD8 *pu1_y_src,
232 UWORD8 *pu1_uv_src,
233 UWORD32 *pu4_rgba_dst,
234 WORD32 wd,
235 WORD32 ht,
236 WORD32 src_y_strd,
237 WORD32 src_uv_strd,
238 WORD32 dst_strd,
239 WORD32 is_u_first)
240 {
241
242
243 WORD16 i2_r, i2_g, i2_b;
244 UWORD32 u4_r, u4_g, u4_b;
245 WORD16 i2_i, i2_j;
246 UWORD8 *pu1_y_src_nxt;
247 UWORD32 *pu4_rgba_dst_NextRow;
248
249 UWORD8 *pu1_u_src, *pu1_v_src;
250
251 if(is_u_first)
252 {
253 pu1_u_src = (UWORD8 *)pu1_uv_src;
254 pu1_v_src = (UWORD8 *)pu1_uv_src + 1;
255 }
256 else
257 {
258 pu1_u_src = (UWORD8 *)pu1_uv_src + 1;
259 pu1_v_src = (UWORD8 *)pu1_uv_src;
260 }
261
262 pu1_y_src_nxt = pu1_y_src + src_y_strd;
263 pu4_rgba_dst_NextRow = pu4_rgba_dst + dst_strd;
264
265 for(i2_i = 0; i2_i < (ht >> 1); i2_i++)
266 {
267 for(i2_j = (wd >> 1); i2_j > 0; i2_j--)
268 {
269 i2_b = ((*pu1_u_src - 128) * COEFF4 >> 13);
270 i2_g = ((*pu1_u_src - 128) * COEFF2 + (*pu1_v_src - 128) * COEFF3) >> 13;
271 i2_r = ((*pu1_v_src - 128) * COEFF1) >> 13;
272
273 pu1_u_src += 2;
274 pu1_v_src += 2;
275 /* pixel 0 */
276 /* B */
277 u4_b = CLIP_U8(*pu1_y_src + i2_b);
278 /* G */
279 u4_g = CLIP_U8(*pu1_y_src + i2_g);
280 /* R */
281 u4_r = CLIP_U8(*pu1_y_src + i2_r);
282
283 pu1_y_src++;
284 *pu4_rgba_dst++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
285
286 /* pixel 1 */
287 /* B */
288 u4_b = CLIP_U8(*pu1_y_src + i2_b);
289 /* G */
290 u4_g = CLIP_U8(*pu1_y_src + i2_g);
291 /* R */
292 u4_r = CLIP_U8(*pu1_y_src + i2_r);
293
294 pu1_y_src++;
295 *pu4_rgba_dst++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
296
297 /* pixel 2 */
298 /* B */
299 u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
300 /* G */
301 u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
302 /* R */
303 u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
304
305 pu1_y_src_nxt++;
306 *pu4_rgba_dst_NextRow++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
307
308 /* pixel 3 */
309 /* B */
310 u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
311 /* G */
312 u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
313 /* R */
314 u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
315
316 pu1_y_src_nxt++;
317 *pu4_rgba_dst_NextRow++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
318
319 }
320
321 pu1_u_src = pu1_u_src + src_uv_strd - wd;
322 pu1_v_src = pu1_v_src + src_uv_strd - wd;
323
324 pu1_y_src = pu1_y_src + (src_y_strd << 1) - wd;
325 pu1_y_src_nxt = pu1_y_src_nxt + (src_y_strd << 1) - wd;
326
327 pu4_rgba_dst = pu4_rgba_dst_NextRow - wd + dst_strd;
328 pu4_rgba_dst_NextRow = pu4_rgba_dst_NextRow + (dst_strd << 1) - wd;
329 }
330
331
332 }
333
334 /**
335 *******************************************************************************
336 *
337 * @brief Function used from copying a 420SP buffer
338 *
339 * @par Description
340 * Function used from copying a 420SP buffer
341 *
342 * @param[in] pu1_y_src
343 * Input Y pointer
344 *
345 * @param[in] pu1_uv_src
346 * Input UV pointer (UV is interleaved either in UV or VU format)
347 *
348 * @param[in] pu1_y_dst
349 * Output Y pointer
350 *
351 * @param[in] pu1_uv_dst
352 * Output UV pointer (UV is interleaved in the same format as that of input)
353 *
354 * @param[in] wd
355 * Width
356 *
357 * @param[in] ht
358 * Height
359 *
360 * @param[in] src_y_strd
361 * Input Y Stride
362 *
363 * @param[in] src_uv_strd
364 * Input UV stride
365 *
366 * @param[in] dst_y_strd
367 * Output Y stride
368 *
369 * @param[in] dst_uv_strd
370 * Output UV stride
371 *
372 * @returns None
373 *
374 * @remarks In case there is a need to perform partial frame copy then
375 * by passion appropriate source and destination pointers and appropriate
376 * values for wd and ht it can be done
377 *
378 *******************************************************************************
379 */
380
ihevcd_fmt_conv_420sp_to_420sp(UWORD8 * pu1_y_src,UWORD8 * pu1_uv_src,UWORD8 * pu1_y_dst,UWORD8 * pu1_uv_dst,WORD32 wd,WORD32 ht,WORD32 src_y_strd,WORD32 src_uv_strd,WORD32 dst_y_strd,WORD32 dst_uv_strd)381 void ihevcd_fmt_conv_420sp_to_420sp(UWORD8 *pu1_y_src,
382 UWORD8 *pu1_uv_src,
383 UWORD8 *pu1_y_dst,
384 UWORD8 *pu1_uv_dst,
385 WORD32 wd,
386 WORD32 ht,
387 WORD32 src_y_strd,
388 WORD32 src_uv_strd,
389 WORD32 dst_y_strd,
390 WORD32 dst_uv_strd)
391 {
392 UWORD8 *pu1_src, *pu1_dst;
393 WORD32 num_rows, num_cols, src_strd, dst_strd;
394 WORD32 i;
395
396 /* copy luma */
397 pu1_src = (UWORD8 *)pu1_y_src;
398 pu1_dst = (UWORD8 *)pu1_y_dst;
399
400 num_rows = ht;
401 num_cols = wd;
402
403 src_strd = src_y_strd;
404 dst_strd = dst_y_strd;
405
406 for(i = 0; i < num_rows; i++)
407 {
408 memcpy(pu1_dst, pu1_src, num_cols);
409 pu1_dst += dst_strd;
410 pu1_src += src_strd;
411 }
412
413 /* copy U and V */
414 pu1_src = (UWORD8 *)pu1_uv_src;
415 pu1_dst = (UWORD8 *)pu1_uv_dst;
416
417 num_rows = ht >> 1;
418 num_cols = wd;
419
420 src_strd = src_uv_strd;
421 dst_strd = dst_uv_strd;
422
423 for(i = 0; i < num_rows; i++)
424 {
425 memcpy(pu1_dst, pu1_src, num_cols);
426 pu1_dst += dst_strd;
427 pu1_src += src_strd;
428 }
429 return;
430 }
431
432
433
434 /**
435 *******************************************************************************
436 *
437 * @brief Function used from copying a 420SP buffer
438 *
439 * @par Description
440 * Function used from copying a 420SP buffer
441 *
442 * @param[in] pu1_y_src
443 * Input Y pointer
444 *
445 * @param[in] pu1_uv_src
446 * Input UV pointer (UV is interleaved either in UV or VU format)
447 *
448 * @param[in] pu1_y_dst
449 * Output Y pointer
450 *
451 * @param[in] pu1_uv_dst
452 * Output UV pointer (UV is interleaved in the same format as that of input)
453 *
454 * @param[in] wd
455 * Width
456 *
457 * @param[in] ht
458 * Height
459 *
460 * @param[in] src_y_strd
461 * Input Y Stride
462 *
463 * @param[in] src_uv_strd
464 * Input UV stride
465 *
466 * @param[in] dst_y_strd
467 * Output Y stride
468 *
469 * @param[in] dst_uv_strd
470 * Output UV stride
471 *
472 * @returns None
473 *
474 * @remarks In case there is a need to perform partial frame copy then
475 * by passion appropriate source and destination pointers and appropriate
476 * values for wd and ht it can be done
477 *
478 *******************************************************************************
479 */
ihevcd_fmt_conv_420sp_to_420sp_swap_uv(UWORD8 * pu1_y_src,UWORD8 * pu1_uv_src,UWORD8 * pu1_y_dst,UWORD8 * pu1_uv_dst,WORD32 wd,WORD32 ht,WORD32 src_y_strd,WORD32 src_uv_strd,WORD32 dst_y_strd,WORD32 dst_uv_strd)480 void ihevcd_fmt_conv_420sp_to_420sp_swap_uv(UWORD8 *pu1_y_src,
481 UWORD8 *pu1_uv_src,
482 UWORD8 *pu1_y_dst,
483 UWORD8 *pu1_uv_dst,
484 WORD32 wd,
485 WORD32 ht,
486 WORD32 src_y_strd,
487 WORD32 src_uv_strd,
488 WORD32 dst_y_strd,
489 WORD32 dst_uv_strd)
490 {
491 UWORD8 *pu1_src, *pu1_dst;
492 WORD32 num_rows, num_cols, src_strd, dst_strd;
493 WORD32 i;
494
495 /* copy luma */
496 pu1_src = (UWORD8 *)pu1_y_src;
497 pu1_dst = (UWORD8 *)pu1_y_dst;
498
499 num_rows = ht;
500 num_cols = wd;
501
502 src_strd = src_y_strd;
503 dst_strd = dst_y_strd;
504
505 for(i = 0; i < num_rows; i++)
506 {
507 memcpy(pu1_dst, pu1_src, num_cols);
508 pu1_dst += dst_strd;
509 pu1_src += src_strd;
510 }
511
512 /* copy U and V */
513 pu1_src = (UWORD8 *)pu1_uv_src;
514 pu1_dst = (UWORD8 *)pu1_uv_dst;
515
516 num_rows = ht >> 1;
517 num_cols = wd;
518
519 src_strd = src_uv_strd;
520 dst_strd = dst_uv_strd;
521
522 for(i = 0; i < num_rows; i++)
523 {
524 WORD32 j;
525 for(j = 0; j < num_cols; j += 2)
526 {
527 pu1_dst[j + 0] = pu1_src[j + 1];
528 pu1_dst[j + 1] = pu1_src[j + 0];
529 }
530 pu1_dst += dst_strd;
531 pu1_src += src_strd;
532 }
533 return;
534 }
535 /**
536 *******************************************************************************
537 *
538 * @brief Function used from copying a 420SP buffer
539 *
540 * @par Description
541 * Function used from copying a 420SP buffer
542 *
543 * @param[in] pu1_y_src
544 * Input Y pointer
545 *
546 * @param[in] pu1_uv_src
547 * Input UV pointer (UV is interleaved either in UV or VU format)
548 *
549 * @param[in] pu1_y_dst
550 * Output Y pointer
551 *
552 * @param[in] pu1_u_dst
553 * Output U pointer
554 *
555 * @param[in] pu1_v_dst
556 * Output V pointer
557 *
558 * @param[in] wd
559 * Width
560 *
561 * @param[in] ht
562 * Height
563 *
564 * @param[in] src_y_strd
565 * Input Y Stride
566 *
567 * @param[in] src_uv_strd
568 * Input UV stride
569 *
570 * @param[in] dst_y_strd
571 * Output Y stride
572 *
573 * @param[in] dst_uv_strd
574 * Output UV stride
575 *
576 * @param[in] is_u_first
577 * Flag to indicate if U is the first byte in input chroma part
578 *
579 * @returns none
580 *
581 * @remarks In case there is a need to perform partial frame copy then
582 * by passion appropriate source and destination pointers and appropriate
583 * values for wd and ht it can be done
584 *
585 *******************************************************************************
586 */
587
588
ihevcd_fmt_conv_420sp_to_420p(UWORD8 * pu1_y_src,UWORD8 * pu1_uv_src,UWORD8 * pu1_y_dst,UWORD8 * pu1_u_dst,UWORD8 * pu1_v_dst,WORD32 wd,WORD32 ht,WORD32 src_y_strd,WORD32 src_uv_strd,WORD32 dst_y_strd,WORD32 dst_uv_strd,WORD32 is_u_first,WORD32 disable_luma_copy)589 void ihevcd_fmt_conv_420sp_to_420p(UWORD8 *pu1_y_src,
590 UWORD8 *pu1_uv_src,
591 UWORD8 *pu1_y_dst,
592 UWORD8 *pu1_u_dst,
593 UWORD8 *pu1_v_dst,
594 WORD32 wd,
595 WORD32 ht,
596 WORD32 src_y_strd,
597 WORD32 src_uv_strd,
598 WORD32 dst_y_strd,
599 WORD32 dst_uv_strd,
600 WORD32 is_u_first,
601 WORD32 disable_luma_copy)
602 {
603 UWORD8 *pu1_src, *pu1_dst;
604 UWORD8 *pu1_u_src, *pu1_v_src;
605 WORD32 num_rows, num_cols, src_strd, dst_strd;
606 WORD32 i, j;
607
608 if(0 == disable_luma_copy)
609 {
610 /* copy luma */
611 pu1_src = (UWORD8 *)pu1_y_src;
612 pu1_dst = (UWORD8 *)pu1_y_dst;
613
614 num_rows = ht;
615 num_cols = wd;
616
617 src_strd = src_y_strd;
618 dst_strd = dst_y_strd;
619
620 for(i = 0; i < num_rows; i++)
621 {
622 memcpy(pu1_dst, pu1_src, num_cols);
623 pu1_dst += dst_strd;
624 pu1_src += src_strd;
625 }
626 }
627 /* de-interleave U and V and copy to destination */
628 if(is_u_first)
629 {
630 pu1_u_src = (UWORD8 *)pu1_uv_src;
631 pu1_v_src = (UWORD8 *)pu1_uv_src + 1;
632 }
633 else
634 {
635 pu1_u_src = (UWORD8 *)pu1_uv_src + 1;
636 pu1_v_src = (UWORD8 *)pu1_uv_src;
637 }
638
639
640 num_rows = ht >> 1;
641 num_cols = wd >> 1;
642
643 src_strd = src_uv_strd;
644 dst_strd = dst_uv_strd;
645
646 for(i = 0; i < num_rows; i++)
647 {
648 for(j = 0; j < num_cols; j++)
649 {
650 pu1_u_dst[j] = pu1_u_src[j * 2];
651 pu1_v_dst[j] = pu1_v_src[j * 2];
652 }
653
654 pu1_u_dst += dst_strd;
655 pu1_v_dst += dst_strd;
656 pu1_u_src += src_strd;
657 pu1_v_src += src_strd;
658 }
659 return;
660 }
661
662
663
664 /**
665 *******************************************************************************
666 *
667 * @brief Function used from format conversion or frame copy
668 *
669 * @par Description
670 * Function used from copying or converting a reference frame to display buffer
671 * in non shared mode
672 *
673 * @param[in] pu1_y_dst
674 * Output Y pointer
675 *
676 * @param[in] pu1_u_dst
677 * Output U/UV pointer ( UV is interleaved in the same format as that of input)
678 *
679 * @param[in] pu1_v_dst
680 * Output V pointer ( used in 420P output case)
681 *
682 * @param[in] blocking
683 * To indicate whether format conversion should wait till frame is reconstructed
684 * and then return after complete copy is done. To be set to 1 when called at the
685 * end of frame processing and set to 0 when called between frame processing modules
686 * in order to utilize available MCPS
687 *
688 * @returns Error from IHEVCD_ERROR_T
689 *
690 *******************************************************************************
691 */
ihevcd_fmt_conv(codec_t * ps_codec,process_ctxt_t * ps_proc,UWORD8 * pu1_y_dst,UWORD8 * pu1_u_dst,UWORD8 * pu1_v_dst,WORD32 cur_row,WORD32 num_rows)692 IHEVCD_ERROR_T ihevcd_fmt_conv(codec_t *ps_codec,
693 process_ctxt_t *ps_proc,
694 UWORD8 *pu1_y_dst,
695 UWORD8 *pu1_u_dst,
696 UWORD8 *pu1_v_dst,
697 WORD32 cur_row,
698 WORD32 num_rows)
699 {
700 IHEVCD_ERROR_T ret = (IHEVCD_ERROR_T)IHEVCD_SUCCESS;
701 pic_buf_t *ps_disp_pic;
702 UWORD8 *pu1_y_src, *pu1_uv_src;
703 UWORD8 *pu1_y_dst_tmp, *pu1_uv_dst_tmp;
704 UWORD8 *pu1_u_dst_tmp, *pu1_v_dst_tmp;
705 UWORD16 *pu2_rgb_dst_tmp;
706 UWORD32 *pu4_rgb_dst_tmp;
707 WORD32 is_u_first;
708 UWORD8 *pu1_luma;
709 UWORD8 *pu1_chroma;
710 sps_t *ps_sps;
711 WORD32 disable_luma_copy;
712 WORD32 crop_unit_x, crop_unit_y;
713
714 if(0 == num_rows)
715 return ret;
716
717 /* In case processing is disabled, then no need to format convert/copy */
718 PROFILE_DISABLE_FMT_CONV();
719 ps_sps = ps_proc->ps_sps;
720
721 crop_unit_x = 1;
722 crop_unit_y = 1;
723
724 if(CHROMA_FMT_IDC_YUV420 == ps_sps->i1_chroma_format_idc)
725 {
726 crop_unit_x = 2;
727 crop_unit_y = 2;
728 }
729
730 ps_disp_pic = ps_codec->ps_disp_buf;
731 pu1_luma = ps_disp_pic->pu1_luma;
732 pu1_chroma = ps_disp_pic->pu1_chroma;
733
734
735 /* Take care of cropping */
736 pu1_luma += ps_codec->i4_strd * ps_sps->i2_pic_crop_top_offset * crop_unit_y + ps_sps->i2_pic_crop_left_offset * crop_unit_x;
737
738 /* Left offset is multiplied by 2 because buffer is UV interleaved */
739 pu1_chroma += ps_codec->i4_strd * ps_sps->i2_pic_crop_top_offset + ps_sps->i2_pic_crop_left_offset * 2;
740
741
742 is_u_first = (IV_YUV_420SP_UV == ps_codec->e_ref_chroma_fmt) ? 1 : 0;
743
744 /* In case of 420P output luma copy is disabled for shared mode */
745 disable_luma_copy = 0;
746 if(1 == ps_codec->i4_share_disp_buf)
747 {
748 disable_luma_copy = 1;
749 }
750
751
752
753 {
754 pu1_y_src = pu1_luma + cur_row * ps_codec->i4_strd;
755 pu1_uv_src = pu1_chroma + (cur_row / 2) * ps_codec->i4_strd;
756
757 /* In case of shared mode, with 420P output, get chroma destination */
758 if((1 == ps_codec->i4_share_disp_buf) && (IV_YUV_420P == ps_codec->e_chroma_fmt))
759 {
760 WORD32 i;
761 for(i = 0; i < ps_codec->i4_share_disp_buf_cnt; i++)
762 {
763 WORD32 diff = ps_disp_pic->pu1_luma - ps_codec->s_disp_buffer[i].pu1_bufs[0];
764 if(diff == (ps_codec->i4_strd * PAD_TOP + PAD_LEFT))
765 {
766 pu1_u_dst = ps_codec->s_disp_buffer[i].pu1_bufs[1];
767 pu1_u_dst += (ps_codec->i4_strd * PAD_TOP) / 4 + (PAD_LEFT / 2);
768
769 pu1_v_dst = ps_codec->s_disp_buffer[i].pu1_bufs[2];
770 pu1_v_dst += (ps_codec->i4_strd * PAD_TOP) / 4 + (PAD_LEFT / 2);
771 break;
772 }
773 }
774 }
775 pu2_rgb_dst_tmp = (UWORD16 *)pu1_y_dst;
776 pu2_rgb_dst_tmp += cur_row * ps_codec->i4_disp_strd;
777 pu4_rgb_dst_tmp = (UWORD32 *)pu1_y_dst;
778 pu4_rgb_dst_tmp += cur_row * ps_codec->i4_disp_strd;
779 pu1_y_dst_tmp = pu1_y_dst + cur_row * ps_codec->i4_disp_strd;
780 pu1_uv_dst_tmp = pu1_u_dst + (cur_row / 2) * ps_codec->i4_disp_strd;
781 pu1_u_dst_tmp = pu1_u_dst + (cur_row / 2) * ps_codec->i4_disp_strd / 2;
782 pu1_v_dst_tmp = pu1_v_dst + (cur_row / 2) * ps_codec->i4_disp_strd / 2;
783
784 /* In case of multi threaded implementation, format conversion might be called
785 * before reconstruction is completed. If the frame being converted/copied
786 * is same as the frame being reconstructed,
787 * Check how many rows can be format converted
788 * Convert those many rows and then check for remaining rows and so on
789 */
790
791 if((0 == ps_codec->i4_flush_mode) && (ps_codec->i4_disp_buf_id == ps_proc->i4_cur_pic_buf_id) && (1 < ps_codec->i4_num_cores))
792 {
793 WORD32 idx;
794 UWORD8 *pu1_buf;
795 WORD32 status;
796 WORD32 last_row = cur_row + num_rows;
797 WORD32 last_ctb_y;
798 UWORD32 ctb_in_row;
799
800 while(1)
801 {
802 last_row = cur_row + MAX(num_rows, (1 << ps_sps->i1_log2_ctb_size)) +
803 ps_sps->i2_pic_crop_top_offset * crop_unit_y;
804 last_ctb_y = (last_row >> ps_sps->i1_log2_ctb_size) - 1;
805 /* Since deblocking works with a shift of -4, -4 ,wait till next CTB row is processed */
806 last_ctb_y++;
807 /* In case of a conformance window, an extra wait of one row might be needed */
808 last_ctb_y++;
809 last_ctb_y = MIN(last_ctb_y, (ps_sps->i2_pic_ht_in_ctb - 1));
810
811 idx = (last_ctb_y * ps_sps->i2_pic_wd_in_ctb);
812
813 /*Check if the row below is completely processed before proceeding with format conversion*/
814 status = 1;
815 for(ctb_in_row = 0; (WORD32)ctb_in_row < ps_sps->i2_pic_wd_in_ctb; ctb_in_row++)
816 {
817 pu1_buf = (ps_codec->pu1_proc_map + idx + ctb_in_row);
818 status &= *pu1_buf;
819 }
820
821 if(status)
822 {
823 break;
824 }
825 else
826 {
827 ithread_yield();
828 }
829 }
830 }
831
832
833 if((IV_YUV_420SP_UV == ps_codec->e_chroma_fmt) || (IV_YUV_420SP_VU == ps_codec->e_chroma_fmt))
834 {
835 ihevcd_fmt_conv_420sp_to_420sp_ft *fmt_conv_fptr;
836 if(ps_codec->i4_disp_wd >= MIN_FMT_CONV_SIMD_WIDTH)
837 {
838 fmt_conv_fptr = ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_420sp_fptr;
839 }
840 else
841 {
842 fmt_conv_fptr = ihevcd_fmt_conv_420sp_to_420sp;
843 }
844 fmt_conv_fptr(pu1_y_src, pu1_uv_src,
845 pu1_y_dst_tmp, pu1_uv_dst_tmp,
846 ps_codec->i4_disp_wd,
847 num_rows,
848 ps_codec->i4_strd,
849 ps_codec->i4_strd,
850 ps_codec->i4_disp_strd,
851 ps_codec->i4_disp_strd);
852 }
853 else if(IV_YUV_420P == ps_codec->e_chroma_fmt)
854 {
855 ihevcd_fmt_conv_420sp_to_420p_ft *fmt_conv_fptr;
856 if(ps_codec->i4_disp_wd >= MIN_FMT_CONV_SIMD_WIDTH)
857 {
858 fmt_conv_fptr = ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_420p_fptr;
859 }
860 else
861 {
862 fmt_conv_fptr = ihevcd_fmt_conv_420sp_to_420p;
863 }
864
865 if(0 == disable_luma_copy)
866 {
867 // copy luma
868 WORD32 i;
869 WORD32 num_cols = ps_codec->i4_disp_wd;
870
871 for(i = 0; i < num_rows; i++)
872 {
873 memcpy(pu1_y_dst_tmp, pu1_y_src, num_cols);
874 pu1_y_dst_tmp += ps_codec->i4_disp_strd;
875 pu1_y_src += ps_codec->i4_strd;
876 }
877
878 disable_luma_copy = 1;
879 }
880 fmt_conv_fptr(pu1_y_src, pu1_uv_src,
881 pu1_y_dst_tmp, pu1_u_dst_tmp, pu1_v_dst_tmp,
882 ps_codec->i4_disp_wd,
883 num_rows,
884 ps_codec->i4_strd,
885 ps_codec->i4_strd,
886 ps_codec->i4_disp_strd,
887 (ps_codec->i4_disp_strd / 2),
888 is_u_first,
889 disable_luma_copy);
890 }
891 else if(IV_RGB_565 == ps_codec->e_chroma_fmt)
892 {
893 ihevcd_fmt_conv_420sp_to_rgb565_ft *fmt_conv_fptr;
894 if(ps_codec->i4_disp_wd >= MIN_FMT_CONV_SIMD_WIDTH)
895 {
896 fmt_conv_fptr = ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_rgb565_fptr;
897 }
898 else
899 {
900 fmt_conv_fptr = ihevcd_fmt_conv_420sp_to_rgb565;
901 }
902
903 fmt_conv_fptr(pu1_y_src, pu1_uv_src,
904 pu2_rgb_dst_tmp,
905 ps_codec->i4_disp_wd,
906 num_rows,
907 ps_codec->i4_strd,
908 ps_codec->i4_strd,
909 ps_codec->i4_disp_strd,
910 is_u_first);
911 }
912 else if(IV_RGBA_8888 == ps_codec->e_chroma_fmt)
913 {
914 ihevcd_fmt_conv_420sp_to_rgba8888_ft *fmt_conv_fptr;
915 if(ps_codec->i4_disp_wd >= MIN_FMT_CONV_SIMD_WIDTH)
916 {
917 fmt_conv_fptr = ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_rgba8888_fptr;
918 }
919 else
920 {
921 fmt_conv_fptr = ihevcd_fmt_conv_420sp_to_rgba8888;
922 }
923
924 ASSERT(is_u_first == 1);
925 fmt_conv_fptr(pu1_y_src,
926 pu1_uv_src,
927 pu4_rgb_dst_tmp,
928 ps_codec->i4_disp_wd,
929 num_rows,
930 ps_codec->i4_strd,
931 ps_codec->i4_strd,
932 ps_codec->i4_disp_strd,
933 is_u_first);
934 }
935
936
937
938 }
939 return (ret);
940 }
941
942