1 /******************************************************************************
2 *
3 * Copyright (C) 2015 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20
21 /**
22 *******************************************************************************
23 * @file
24 * ih264e_process.c
25 *
26 * @brief
27 * Contains functions for codec thread
28 *
29 * @author
30 * Harish
31 *
32 * @par List of Functions:
33 * - ih264e_generate_sps_pps()
34 * - ih264e_init_entropy_ctxt()
35 * - ih264e_entropy()
36 * - ih264e_pack_header_data()
37 * - ih264e_update_proc_ctxt()
38 * - ih264e_init_proc_ctxt()
39 * - ih264e_pad_recon_buffer()
40 * - ih264e_dblk_pad_hpel_processing_n_mbs()
41 * - ih264e_process()
42 * - ih264e_set_rc_pic_params()
43 * - ih264e_update_rc_post_enc()
44 * - ih264e_process_thread()
45 *
46 * @remarks
47 * None
48 *
49 *******************************************************************************
50 */
51
52 /*****************************************************************************/
53 /* File Includes */
54 /*****************************************************************************/
55
56 /* System include files */
57 #include <stdio.h>
58 #include <stddef.h>
59 #include <stdlib.h>
60 #include <string.h>
61 #include <limits.h>
62 #include <assert.h>
63
64 /* User include files */
65 #include "ih264_typedefs.h"
66 #include "iv2.h"
67 #include "ive2.h"
68 #include "ih264_defs.h"
69 #include "ih264_debug.h"
70 #include "ime_distortion_metrics.h"
71 #include "ime_defs.h"
72 #include "ime_structs.h"
73 #include "ih264_error.h"
74 #include "ih264_structs.h"
75 #include "ih264_trans_quant_itrans_iquant.h"
76 #include "ih264_inter_pred_filters.h"
77 #include "ih264_mem_fns.h"
78 #include "ih264_padding.h"
79 #include "ih264_intra_pred_filters.h"
80 #include "ih264_deblk_edge_filters.h"
81 #include "ih264_cabac_tables.h"
82 #include "ih264_platform_macros.h"
83 #include "ih264_macros.h"
84 #include "ih264_buf_mgr.h"
85 #include "ih264e_error.h"
86 #include "ih264e_bitstream.h"
87 #include "ih264_common_tables.h"
88 #include "ih264_list.h"
89 #include "ih264e_defs.h"
90 #include "irc_cntrl_param.h"
91 #include "irc_frame_info_collector.h"
92 #include "ih264e_rate_control.h"
93 #include "ih264e_cabac_structs.h"
94 #include "ih264e_structs.h"
95 #include "ih264e_cabac.h"
96 #include "ih264e_process.h"
97 #include "ithread.h"
98 #include "ih264e_intra_modes_eval.h"
99 #include "ih264e_encode_header.h"
100 #include "ih264e_globals.h"
101 #include "ih264e_config.h"
102 #include "ih264e_trace.h"
103 #include "ih264e_statistics.h"
104 #include "ih264_cavlc_tables.h"
105 #include "ih264e_cavlc.h"
106 #include "ih264e_deblk.h"
107 #include "ih264e_me.h"
108 #include "ih264e_debug.h"
109 #include "ih264e_master.h"
110 #include "ih264e_utils.h"
111 #include "irc_mem_req_and_acq.h"
112 #include "irc_rate_control_api.h"
113 #include "ih264e_platform_macros.h"
114 #include "ime_statistics.h"
115
116
117 /*****************************************************************************/
118 /* Function Definitions */
119 /*****************************************************************************/
120
121 /**
122 ******************************************************************************
123 *
124 * @brief This function generates sps, pps set on request
125 *
126 * @par Description
127 * When the encoder is set in header generation mode, the following function
128 * is called. This generates sps and pps headers and returns the control back
129 * to caller.
130 *
131 * @param[in] ps_codec
132 * pointer to codec context
133 *
134 * @return success or failure error code
135 *
136 ******************************************************************************
137 */
ih264e_generate_sps_pps(codec_t * ps_codec)138 IH264E_ERROR_T ih264e_generate_sps_pps(codec_t *ps_codec)
139 {
140 /* choose between ping-pong process buffer set */
141 WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
142
143 /* entropy ctxt */
144 entropy_ctxt_t *ps_entropy = &ps_codec->as_process[ctxt_sel * MAX_PROCESS_THREADS].s_entropy;
145
146 /* Bitstream structure */
147 bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm;
148
149 /* sps */
150 sps_t *ps_sps = NULL;
151
152 /* pps */
153 pps_t *ps_pps = NULL;
154
155 /* output buff */
156 out_buf_t *ps_out_buf = &ps_codec->as_out_buf[ctxt_sel];
157
158
159 /********************************************************************/
160 /* initialize the bit stream buffer */
161 /********************************************************************/
162 ih264e_bitstrm_init(ps_bitstrm, ps_out_buf->s_bits_buf.pv_buf, ps_out_buf->s_bits_buf.u4_bufsize);
163
164 /********************************************************************/
165 /* BEGIN HEADER GENERATION */
166 /********************************************************************/
167 /*ps_codec->i4_pps_id ++;*/
168 ps_codec->i4_pps_id %= MAX_PPS_CNT;
169
170 /*ps_codec->i4_sps_id ++;*/
171 ps_codec->i4_sps_id %= MAX_SPS_CNT;
172
173 /* populate sps header */
174 ps_sps = ps_codec->ps_sps_base + ps_codec->i4_sps_id;
175 ih264e_populate_sps(ps_codec, ps_sps);
176
177 /* populate pps header */
178 ps_pps = ps_codec->ps_pps_base + ps_codec->i4_pps_id;
179 ih264e_populate_pps(ps_codec, ps_pps);
180
181 ps_entropy->i4_error_code = IH264E_SUCCESS;
182
183 /* generate sps */
184 ps_entropy->i4_error_code |= ih264e_generate_sps(ps_bitstrm, ps_sps,
185 &ps_codec->s_cfg.s_vui);
186
187 /* generate pps */
188 ps_entropy->i4_error_code |= ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps);
189
190 /* queue output buffer */
191 ps_out_buf->s_bits_buf.u4_bytes = ps_bitstrm->u4_strm_buf_offset;
192
193 return ps_entropy->i4_error_code;
194 }
195
196 /**
197 *******************************************************************************
198 *
199 * @brief initialize entropy context.
200 *
201 * @par Description:
202 * Before invoking the call to perform to entropy coding the entropy context
203 * associated with the job needs to be initialized. This involves the start
204 * mb address, end mb address, slice index and the pointer to location at
205 * which the mb residue info and mb header info are packed.
206 *
207 * @param[in] ps_proc
208 * Pointer to the current process context
209 *
210 * @returns error status
211 *
212 * @remarks none
213 *
214 *******************************************************************************
215 */
ih264e_init_entropy_ctxt(process_ctxt_t * ps_proc)216 IH264E_ERROR_T ih264e_init_entropy_ctxt(process_ctxt_t *ps_proc)
217 {
218 /* codec context */
219 codec_t *ps_codec = ps_proc->ps_codec;
220
221 /* entropy ctxt */
222 entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
223
224 /* start address */
225 ps_entropy->i4_mb_start_add = ps_entropy->i4_mb_y * ps_entropy->i4_wd_mbs + ps_entropy->i4_mb_x;
226
227 /* end address */
228 ps_entropy->i4_mb_end_add = ps_entropy->i4_mb_start_add + ps_entropy->i4_mb_cnt;
229
230 /* slice index */
231 ps_entropy->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_entropy->i4_mb_start_add];
232
233 /* sof */
234 /* @ start of frame or start of a new slice, set sof flag */
235 if (ps_entropy->i4_mb_start_add == 0)
236 {
237 ps_entropy->i4_sof = 1;
238 }
239
240 if (ps_entropy->i4_mb_x == 0)
241 {
242 /* packed mb coeff data */
243 ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) +
244 ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data;
245
246 /* packed mb header data */
247 ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) +
248 ps_entropy->i4_mb_y * ps_codec->u4_size_header_data;
249 }
250
251 return IH264E_SUCCESS;
252 }
253
254 /**
255 *******************************************************************************
256 *
257 * @brief entry point for entropy coding
258 *
259 * @par Description
260 * This function calls lower level functions to perform entropy coding for a
261 * group (n rows) of mb's. After encoding 1 row of mb's, the function takes
262 * back the control, updates the ctxt and calls lower level functions again.
263 * This process is repeated till all the rows or group of mb's (which ever is
264 * minimum) are coded
265 *
266 * @param[in] ps_proc
267 * process context
268 *
269 * @returns error status
270 *
271 * @remarks
272 *
273 *******************************************************************************
274 */
275
ih264e_entropy(process_ctxt_t * ps_proc)276 IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc)
277 {
278 /* codec context */
279 codec_t *ps_codec = ps_proc->ps_codec;
280
281 /* entropy context */
282 entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
283
284 /* cabac context */
285 cabac_ctxt_t *ps_cabac_ctxt = ps_entropy->ps_cabac;
286
287 /* sps */
288 sps_t *ps_sps = ps_entropy->ps_sps_base + (ps_entropy->u4_sps_id % MAX_SPS_CNT);
289
290 /* pps */
291 pps_t *ps_pps = ps_entropy->ps_pps_base + (ps_entropy->u4_pps_id % MAX_PPS_CNT);
292
293 /* slice header */
294 slice_header_t *ps_slice_hdr = ps_entropy->ps_slice_hdr_base + (ps_entropy->i4_cur_slice_idx % MAX_SLICE_HDR_CNT);
295
296 /* slice type */
297 WORD32 i4_slice_type = ps_proc->i4_slice_type;
298
299 /* Bitstream structure */
300 bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm;
301
302 /* output buff */
303 out_buf_t s_out_buf;
304
305 /* proc map */
306 UWORD8 *pu1_proc_map;
307
308 /* entropy map */
309 UWORD8 *pu1_entropy_map_curr;
310
311 /* proc base idx */
312 WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt % MAX_CTXT_SETS;
313
314 /* temp var */
315 WORD32 i4_wd_mbs, i4_ht_mbs;
316 UWORD32 u4_mb_cnt, u4_mb_idx, u4_mb_end_idx;
317 WORD32 bitstream_start_offset, bitstream_end_offset;
318 /********************************************************************/
319 /* BEGIN INIT */
320 /********************************************************************/
321
322 /* entropy encode start address */
323 u4_mb_idx = ps_entropy->i4_mb_start_add;
324
325 /* entropy encode end address */
326 u4_mb_end_idx = ps_entropy->i4_mb_end_add;
327
328 /* width in mbs */
329 i4_wd_mbs = ps_entropy->i4_wd_mbs;
330
331 /* height in mbs */
332 i4_ht_mbs = ps_entropy->i4_ht_mbs;
333
334 /* total mb cnt */
335 u4_mb_cnt = i4_wd_mbs * i4_ht_mbs;
336
337 /* proc map */
338 pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs;
339
340 /* entropy map */
341 pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs;
342
343 /********************************************************************/
344 /* @ start of frame / slice, */
345 /* initialize the output buffer, */
346 /* initialize the bit stream buffer, */
347 /* check if sps and pps headers have to be generated, */
348 /* populate and generate slice header */
349 /********************************************************************/
350 if (ps_entropy->i4_sof)
351 {
352 /********************************************************************/
353 /* initialize the output buffer */
354 /********************************************************************/
355 s_out_buf = ps_codec->as_out_buf[ctxt_sel];
356
357 /* is last frame to encode */
358 s_out_buf.u4_is_last = ps_entropy->u4_is_last;
359
360 /* frame idx */
361 s_out_buf.u4_timestamp_high = ps_entropy->u4_timestamp_high;
362 s_out_buf.u4_timestamp_low = ps_entropy->u4_timestamp_low;
363
364 /********************************************************************/
365 /* initialize the bit stream buffer */
366 /********************************************************************/
367 ih264e_bitstrm_init(ps_bitstrm, s_out_buf.s_bits_buf.pv_buf, s_out_buf.s_bits_buf.u4_bufsize);
368
369 /********************************************************************/
370 /* BEGIN HEADER GENERATION */
371 /********************************************************************/
372 if (1 == ps_entropy->i4_gen_header)
373 {
374 /* generate sps */
375 ps_entropy->i4_error_code |= ih264e_generate_sps(ps_bitstrm, ps_sps,
376 &ps_codec->s_cfg.s_vui);
377 /* generate pps */
378 ps_entropy->i4_error_code |= ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps);
379
380 /* reset i4_gen_header */
381 ps_entropy->i4_gen_header = 0;
382 }
383
384 /* populate slice header */
385 ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps, ps_sps);
386
387 /* generate slice header */
388 ps_entropy->i4_error_code |= ih264e_generate_slice_header(ps_bitstrm, ps_slice_hdr,
389 ps_pps, ps_sps);
390
391 /* once start of frame / slice is done, you can reset it */
392 /* it is the responsibility of the caller to set this flag */
393 ps_entropy->i4_sof = 0;
394
395 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
396 {
397 BITSTREAM_BYTE_ALIGN(ps_bitstrm);
398 BITSTREAM_FLUSH(ps_bitstrm);
399 ih264e_init_cabac_ctxt(ps_entropy);
400 }
401 }
402
403 /* begin entropy coding for the mb set */
404 while (u4_mb_idx < u4_mb_end_idx)
405 {
406 /* init ptrs/indices */
407 if (ps_entropy->i4_mb_x == i4_wd_mbs)
408 {
409 ps_entropy->i4_mb_y++;
410 ps_entropy->i4_mb_x = 0;
411
412 /* packed mb coeff data */
413 ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) +
414 ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data;
415
416 /* packed mb header data */
417 ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) +
418 ps_entropy->i4_mb_y * ps_codec->u4_size_header_data;
419
420 /* proc map */
421 pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs;
422
423 /* entropy map */
424 pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs;
425 }
426
427 DEBUG("\nmb indices x, y %d, %d", ps_entropy->i4_mb_x, ps_entropy->i4_mb_y);
428 ENTROPY_TRACE("mb index x %d", ps_entropy->i4_mb_x);
429 ENTROPY_TRACE("mb index y %d", ps_entropy->i4_mb_y);
430
431 /* wait until the curr mb is core coded */
432 /* The wait for curr mb to be core coded is essential when entropy is launched
433 * as a separate job
434 */
435 while (1)
436 {
437 volatile UWORD8 *pu1_buf1;
438 WORD32 idx = ps_entropy->i4_mb_x;
439
440 pu1_buf1 = pu1_proc_map + idx;
441 if (*pu1_buf1)
442 break;
443 ithread_yield();
444 }
445
446
447 /* write mb layer */
448 ps_entropy->i4_error_code |= ps_codec->pf_write_mb_syntax_layer[ps_entropy->u1_entropy_coding_mode_flag][i4_slice_type](ps_entropy);
449 /* Starting bitstream offset for header in bits */
450 bitstream_start_offset = GET_NUM_BITS(ps_bitstrm);
451
452 /* set entropy map */
453 pu1_entropy_map_curr[ps_entropy->i4_mb_x] = 1;
454
455 u4_mb_idx++;
456 ps_entropy->i4_mb_x++;
457 /* check for eof */
458 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
459 {
460 if (ps_entropy->i4_mb_x < i4_wd_mbs)
461 {
462 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
463 }
464 }
465
466 if (ps_entropy->i4_mb_x == i4_wd_mbs)
467 {
468 /* if slices are enabled */
469 if (ps_codec->s_cfg.e_slice_mode == IVE_SLICE_MODE_BLOCKS)
470 {
471 /* current slice index */
472 WORD32 i4_curr_slice_idx = ps_entropy->i4_cur_slice_idx;
473
474 /* slice map */
475 UWORD8 *pu1_slice_idx = ps_entropy->pu1_slice_idx;
476
477 /* No need to open a slice at end of frame. The current slice can be closed at the time
478 * of signaling eof flag.
479 */
480 if ((u4_mb_idx != u4_mb_cnt) && (i4_curr_slice_idx
481 != pu1_slice_idx[u4_mb_idx]))
482 {
483 if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag)
484 { /* mb skip run */
485 if ((i4_slice_type != ISLICE)
486 && *ps_entropy->pi4_mb_skip_run)
487 {
488 if (*ps_entropy->pi4_mb_skip_run)
489 {
490 PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run, ps_entropy->i4_error_code, "mb skip run");
491 *ps_entropy->pi4_mb_skip_run = 0;
492 }
493 }
494 /* put rbsp trailing bits for the previous slice */
495 ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm);
496 }
497 else
498 {
499 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1);
500 }
501
502 /* update slice header pointer */
503 i4_curr_slice_idx = pu1_slice_idx[u4_mb_idx];
504 ps_entropy->i4_cur_slice_idx = i4_curr_slice_idx;
505 ps_slice_hdr = ps_entropy->ps_slice_hdr_base+ (i4_curr_slice_idx % MAX_SLICE_HDR_CNT);
506
507 /* populate slice header */
508 ps_entropy->i4_mb_start_add = u4_mb_idx;
509 ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps,
510 ps_sps);
511
512 /* generate slice header */
513 ps_entropy->i4_error_code |= ih264e_generate_slice_header(
514 ps_bitstrm, ps_slice_hdr, ps_pps, ps_sps);
515 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
516 {
517 BITSTREAM_BYTE_ALIGN(ps_bitstrm);
518 BITSTREAM_FLUSH(ps_bitstrm);
519 ih264e_init_cabac_ctxt(ps_entropy);
520 }
521 }
522 else
523 {
524 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag
525 && u4_mb_idx != u4_mb_cnt)
526 {
527 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
528 }
529 }
530 }
531 /* Dont execute any further instructions until store synchronization took place */
532 DATA_SYNC();
533 }
534
535 /* Ending bitstream offset for header in bits */
536 bitstream_end_offset = GET_NUM_BITS(ps_bitstrm);
537 ps_entropy->u4_header_bits[i4_slice_type == PSLICE] +=
538 bitstream_end_offset - bitstream_start_offset;
539 }
540
541 /* check for eof */
542 if (u4_mb_idx == u4_mb_cnt)
543 {
544 /* set end of frame flag */
545 ps_entropy->i4_eof = 1;
546 }
547 else
548 {
549 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag
550 && ps_codec->s_cfg.e_slice_mode
551 != IVE_SLICE_MODE_BLOCKS)
552 {
553 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
554 }
555 }
556
557 if (ps_entropy->i4_eof)
558 {
559 if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag)
560 {
561 /* mb skip run */
562 if ((i4_slice_type != ISLICE) && *ps_entropy->pi4_mb_skip_run)
563 {
564 if (*ps_entropy->pi4_mb_skip_run)
565 {
566 PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run,
567 ps_entropy->i4_error_code, "mb skip run");
568 *ps_entropy->pi4_mb_skip_run = 0;
569 }
570 }
571 /* put rbsp trailing bits */
572 ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm);
573 }
574 else
575 {
576 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1);
577 }
578
579 /* update current frame stats to rc library */
580 {
581 /* number of bytes to stuff */
582 WORD32 i4_stuff_bytes;
583
584 /* update */
585 i4_stuff_bytes = ih264e_update_rc_post_enc(
586 ps_codec, ctxt_sel,
587 (ps_proc->ps_codec->i4_poc == 0));
588
589 /* cbr rc - house keeping */
590 if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel])
591 {
592 ps_entropy->ps_bitstrm->u4_strm_buf_offset = 0;
593 }
594 else if (i4_stuff_bytes)
595 {
596 /* add filler nal units */
597 ps_entropy->i4_error_code |= ih264e_add_filler_nal_unit(ps_bitstrm, i4_stuff_bytes);
598 }
599 }
600
601 /*
602 *Frame number is to be incremented only if the current frame is a
603 * reference frame. After each successful frame encode, we increment
604 * frame number by 1
605 */
606 if (!ps_codec->s_rate_control.post_encode_skip[ctxt_sel]
607 && ps_codec->u4_is_curr_frm_ref)
608 {
609 ps_codec->i4_frame_num++;
610 }
611 /********************************************************************/
612 /* signal the output */
613 /********************************************************************/
614 ps_codec->as_out_buf[ctxt_sel].s_bits_buf.u4_bytes =
615 ps_entropy->ps_bitstrm->u4_strm_buf_offset;
616
617 DEBUG("entropy status %x", ps_entropy->i4_error_code);
618 }
619
620 /* allow threads to dequeue entropy jobs */
621 ps_codec->au4_entropy_thread_active[ctxt_sel] = 0;
622
623 return ps_entropy->i4_error_code;
624 }
625
626 /**
627 *******************************************************************************
628 *
629 * @brief Packs header information of a mb in to a buffer
630 *
631 * @par Description:
632 * After the deciding the mode info of a macroblock, the syntax elements
633 * associated with the mb are packed and stored. The entropy thread unpacks
634 * this buffer and generates the end bit stream.
635 *
636 * @param[in] ps_proc
637 * Pointer to the current process context
638 *
639 * @returns error status
640 *
641 * @remarks none
642 *
643 *******************************************************************************
644 */
ih264e_pack_header_data(process_ctxt_t * ps_proc)645 IH264E_ERROR_T ih264e_pack_header_data(process_ctxt_t *ps_proc)
646 {
647 /* curr mb type */
648 UWORD32 u4_mb_type = ps_proc->u4_mb_type;
649
650 /* pack mb syntax layer of curr mb (used for entropy coding) */
651 if (u4_mb_type == I4x4)
652 {
653 /* pointer to mb header storage space */
654 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
655
656 /* temp var */
657 WORD32 i4, byte;
658
659 /* mb type plus mode */
660 *pu1_ptr++ = (ps_proc->u1_c_i8_mode << 6) + u4_mb_type;
661
662 /* cbp */
663 *pu1_ptr++ = ps_proc->u4_cbp;
664
665 /* mb qp delta */
666 *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
667
668 /* sub mb modes */
669 for (i4 = 0; i4 < 16; i4 ++)
670 {
671 byte = 0;
672
673 if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] ==
674 ps_proc->au1_intra_luma_mb_4x4_modes[i4])
675 {
676 byte |= 1;
677 }
678 else
679 {
680
681 if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] <
682 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4])
683 {
684 byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 1);
685 }
686 else
687 {
688 byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 1;
689 }
690 }
691
692 i4++;
693
694 if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] ==
695 ps_proc->au1_intra_luma_mb_4x4_modes[i4])
696 {
697 byte |= 16;
698 }
699 else
700 {
701
702 if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] <
703 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4])
704 {
705 byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 5);
706 }
707 else
708 {
709 byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 5;
710 }
711 }
712
713 *pu1_ptr++ = byte;
714 }
715
716 /* end of mb layer */
717 ps_proc->pv_mb_header_data = pu1_ptr;
718 }
719 else if (u4_mb_type == I16x16)
720 {
721 /* pointer to mb header storage space */
722 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
723
724 /* mb type plus mode */
725 *pu1_ptr++ = (ps_proc->u1_c_i8_mode << 6) + (ps_proc->u1_l_i16_mode << 4) + u4_mb_type;
726
727 /* cbp */
728 *pu1_ptr++ = ps_proc->u4_cbp;
729
730 /* mb qp delta */
731 *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
732
733 /* end of mb layer */
734 ps_proc->pv_mb_header_data = pu1_ptr;
735 }
736 else if (u4_mb_type == P16x16)
737 {
738 /* pointer to mb header storage space */
739 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
740
741 WORD16 *i2_mv_ptr;
742
743 /* mb type plus mode */
744 *pu1_ptr++ = u4_mb_type;
745
746 /* cbp */
747 *pu1_ptr++ = ps_proc->u4_cbp;
748
749 /* mb qp delta */
750 *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
751
752 i2_mv_ptr = (WORD16 *)pu1_ptr;
753
754 *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx - ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
755
756 *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy - ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
757
758 /* end of mb layer */
759 ps_proc->pv_mb_header_data = i2_mv_ptr;
760 }
761 else if (u4_mb_type == PSKIP)
762 {
763 /* pointer to mb header storage space */
764 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
765
766 /* mb type plus mode */
767 *pu1_ptr++ = u4_mb_type;
768
769 /* end of mb layer */
770 ps_proc->pv_mb_header_data = pu1_ptr;
771 }
772 else if(u4_mb_type == B16x16)
773 {
774
775 /* pointer to mb header storage space */
776 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
777
778 WORD16 *i2_mv_ptr;
779
780 UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode;
781
782 /* mb type plus mode */
783 *pu1_ptr++ = (u4_pred_mode << 4) + u4_mb_type;
784
785 /* cbp */
786 *pu1_ptr++ = ps_proc->u4_cbp;
787
788 /* mb qp delta */
789 *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
790
791 /* l0 & l1 me data */
792 i2_mv_ptr = (WORD16 *)pu1_ptr;
793
794 if (u4_pred_mode != PRED_L1)
795 {
796 *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx
797 - ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
798
799 *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy
800 - ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
801 }
802 if (u4_pred_mode != PRED_L0)
803 {
804 *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvx
805 - ps_proc->ps_pred_mv[1].s_mv.i2_mvx;
806
807 *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvy
808 - ps_proc->ps_pred_mv[1].s_mv.i2_mvy;
809 }
810
811 /* end of mb layer */
812 ps_proc->pv_mb_header_data = i2_mv_ptr;
813
814 }
815 else if(u4_mb_type == BDIRECT)
816 {
817 /* pointer to mb header storage space */
818 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
819
820 /* mb type plus mode */
821 *pu1_ptr++ = u4_mb_type;
822
823 /* cbp */
824 *pu1_ptr++ = ps_proc->u4_cbp;
825
826 /* mb qp delta */
827 *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
828
829 ps_proc->pv_mb_header_data = pu1_ptr;
830
831 }
832 else if(u4_mb_type == BSKIP)
833 {
834 UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode;
835
836 /* pointer to mb header storage space */
837 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
838
839 /* mb type plus mode */
840 *pu1_ptr++ = (u4_pred_mode << 4) + u4_mb_type;
841
842 /* end of mb layer */
843 ps_proc->pv_mb_header_data = pu1_ptr;
844 }
845
846 return IH264E_SUCCESS;
847 }
848
849 /**
850 *******************************************************************************
851 *
852 * @brief update process context after encoding an mb. This involves preserving
853 * the current mb information for later use, initialize the proc ctxt elements to
854 * encode next mb.
855 *
856 * @par Description:
857 * This function performs house keeping tasks after encoding an mb.
858 * After encoding an mb, various elements of the process context needs to be
859 * updated to encode the next mb. For instance, the source, recon and reference
860 * pointers, mb indices have to be adjusted to the next mb. The slice index of
861 * the current mb needs to be updated. If mb qp modulation is enabled, then if
862 * the qp changes the quant param structure needs to be updated. Also to encoding
863 * the next mb, the current mb info is used as part of mode prediction or mv
864 * prediction. Hence the current mb info has to preserved at top/top left/left
865 * locations.
866 *
867 * @param[in] ps_proc
868 * Pointer to the current process context
869 *
870 * @returns none
871 *
872 * @remarks none
873 *
874 *******************************************************************************
875 */
ih264e_update_proc_ctxt(process_ctxt_t * ps_proc)876 WORD32 ih264e_update_proc_ctxt(process_ctxt_t *ps_proc)
877 {
878 /* error status */
879 WORD32 error_status = IH264_SUCCESS;
880
881 /* codec context */
882 codec_t *ps_codec = ps_proc->ps_codec;
883
884 /* curr mb indices */
885 WORD32 i4_mb_x = ps_proc->i4_mb_x;
886 WORD32 i4_mb_y = ps_proc->i4_mb_y;
887
888 /* mb syntax elements of neighbors */
889 mb_info_t *ps_left_syn = &ps_proc->s_left_mb_syntax_ele;
890 mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + i4_mb_x;
891 mb_info_t *ps_top_left_syn = &ps_proc->s_top_left_mb_syntax_ele;
892
893 /* curr mb type */
894 UWORD32 u4_mb_type = ps_proc->u4_mb_type;
895
896 /* curr mb type */
897 UWORD32 u4_is_intra = ps_proc->u4_is_intra;
898
899 /* width in mbs */
900 WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs;
901
902 /*height in mbs*/
903 WORD32 i4_ht_mbs = ps_proc->i4_ht_mbs;
904
905 /* proc map */
906 UWORD8 *pu1_proc_map = ps_proc->pu1_proc_map + (i4_mb_y * i4_wd_mbs);
907
908 /* deblk context */
909 deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
910
911 /* deblk bs context */
912 bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt);
913
914 /* top row motion vector info */
915 enc_pu_t *ps_top_row_pu = ps_proc->ps_top_row_pu + i4_mb_x;
916
917 /* top left mb motion vector */
918 enc_pu_t *ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu;
919
920 /* left mb motion vector */
921 enc_pu_t *ps_left_mb_pu = &ps_proc->s_left_mb_pu;
922
923 /* sub mb modes */
924 UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (i4_mb_x << 4);
925
926 /*************************************************************/
927 /* During MV prediction, when top right mb is not available, */
928 /* top left mb info. is used for prediction. Hence the curr */
929 /* top, which will be top left for the next mb needs to be */
930 /* preserved before updating it with curr mb info. */
931 /*************************************************************/
932
933 /* mb type, mb class, csbp */
934 *ps_top_left_syn = *ps_top_syn;
935
936 if (ps_proc->i4_slice_type != ISLICE)
937 {
938 /*****************************************/
939 /* update top left with top info results */
940 /*****************************************/
941 /* mv */
942 *ps_top_left_mb_pu = *ps_top_row_pu;
943 }
944
945 /*************************************************/
946 /* update top and left with curr mb info results */
947 /*************************************************/
948
949 /* mb type */
950 ps_left_syn->u2_mb_type = ps_top_syn->u2_mb_type = u4_mb_type;
951
952 /* mb class */
953 ps_left_syn->u2_is_intra = ps_top_syn->u2_is_intra = u4_is_intra;
954
955 /* csbp */
956 ps_left_syn->u4_csbp = ps_top_syn->u4_csbp = ps_proc->u4_csbp;
957
958 /* distortion */
959 ps_left_syn->i4_mb_distortion = ps_top_syn->i4_mb_distortion = ps_proc->i4_mb_distortion;
960
961 if (u4_is_intra)
962 {
963 /* mb / sub mb modes */
964 if (I16x16 == u4_mb_type)
965 {
966 pu1_top_mb_intra_modes[0] = ps_proc->au1_left_mb_intra_modes[0] = ps_proc->u1_l_i16_mode;
967 }
968 else if (I4x4 == u4_mb_type)
969 {
970 ps_codec->pf_mem_cpy_mul8(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16);
971 ps_codec->pf_mem_cpy_mul8(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16);
972 }
973 else if (I8x8 == u4_mb_type)
974 {
975 memcpy(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4);
976 memcpy(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4);
977 }
978
979 if ((ps_proc->i4_slice_type == PSLICE) ||(ps_proc->i4_slice_type == BSLICE))
980 {
981 /* mv */
982 *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu);
983 }
984
985 *ps_proc->pu4_mb_pu_cnt = 1;
986 }
987 else
988 {
989 /* mv */
990 *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu);
991 }
992
993 /*
994 * Mark that the MB has been coded intra
995 * So that future AIRs can skip it
996 */
997 ps_proc->pu1_is_intra_coded[i4_mb_x + (i4_mb_y * i4_wd_mbs)] = u4_is_intra;
998
999 /**************************************************/
1000 /* pack mb header info. for entropy coding */
1001 /**************************************************/
1002 ih264e_pack_header_data(ps_proc);
1003
1004 /* update previous mb qp */
1005 ps_proc->u4_mb_qp_prev = ps_proc->u4_mb_qp;
1006
1007 /* store qp */
1008 ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp;
1009
1010 /*
1011 * We need to sync the cache to make sure that the nmv content of proc
1012 * is updated to cache properly
1013 */
1014 DATA_SYNC();
1015
1016 /* Just before finishing the row, enqueue the job in to entropy queue.
1017 * The master thread depending on its convenience shall dequeue it and
1018 * performs entropy.
1019 *
1020 * WARN !! Placing this block post proc map update can cause queuing of
1021 * entropy jobs in out of order.
1022 */
1023 if (i4_mb_x == i4_wd_mbs - 1)
1024 {
1025 /* job structures */
1026 job_t s_job;
1027
1028 /* job class */
1029 s_job.i4_cmd = CMD_ENTROPY;
1030
1031 /* number of mbs to be processed in the current job */
1032 s_job.i2_mb_cnt = ps_codec->s_cfg.i4_wd_mbs;
1033
1034 /* job start index x */
1035 s_job.i2_mb_x = 0;
1036
1037 /* job start index y */
1038 s_job.i2_mb_y = ps_proc->i4_mb_y;
1039
1040 /* proc base idx */
1041 s_job.i2_proc_base_idx = (ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS) ? (MAX_PROCESS_CTXT / 2) : 0;
1042
1043 /* queue the job */
1044 error_status |= ih264_list_queue(ps_proc->pv_entropy_jobq, &s_job, 1);
1045
1046 if(ps_proc->i4_mb_y == (i4_ht_mbs - 1))
1047 ih264_list_terminate(ps_codec->pv_entropy_jobq);
1048 }
1049
1050 /* update proc map */
1051 pu1_proc_map[i4_mb_x] = 1;
1052
1053 /**************************************************/
1054 /* update proc ctxt elements for encoding next mb */
1055 /**************************************************/
1056 /* update indices */
1057 i4_mb_x ++;
1058 ps_proc->i4_mb_x = i4_mb_x;
1059
1060 if (ps_proc->i4_mb_x == i4_wd_mbs)
1061 {
1062 ps_proc->i4_mb_y++;
1063 ps_proc->i4_mb_x = 0;
1064 }
1065
1066 /* update slice index */
1067 ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_proc->i4_mb_y * i4_wd_mbs + ps_proc->i4_mb_x];
1068
1069 /* update buffers pointers */
1070 ps_proc->pu1_src_buf_luma += MB_SIZE;
1071 ps_proc->pu1_rec_buf_luma += MB_SIZE;
1072 ps_proc->apu1_ref_buf_luma[0] += MB_SIZE;
1073 ps_proc->apu1_ref_buf_luma[1] += MB_SIZE;
1074
1075 /*
1076 * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
1077 * the stride per MB is MB_SIZE
1078 */
1079 ps_proc->pu1_src_buf_chroma += MB_SIZE;
1080 ps_proc->pu1_rec_buf_chroma += MB_SIZE;
1081 ps_proc->apu1_ref_buf_chroma[0] += MB_SIZE;
1082 ps_proc->apu1_ref_buf_chroma[1] += MB_SIZE;
1083
1084
1085
1086 /* Reset cost, distortion params */
1087 ps_proc->i4_mb_cost = INT_MAX;
1088 ps_proc->i4_mb_distortion = SHRT_MAX;
1089
1090 ps_proc->ps_pu += *ps_proc->pu4_mb_pu_cnt;
1091
1092 ps_proc->pu4_mb_pu_cnt += 1;
1093
1094 /* Update colocated pu */
1095 if (ps_proc->i4_slice_type == BSLICE)
1096 ps_proc->ps_colpu += *(ps_proc->aps_mv_buf[1]->pu4_mb_pu_cnt + (i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x);
1097
1098 /* deblk ctxts */
1099 if (ps_proc->u4_disable_deblock_level != 1)
1100 {
1101 /* indices */
1102 ps_bs->i4_mb_x = ps_proc->i4_mb_x;
1103 ps_bs->i4_mb_y = ps_proc->i4_mb_y;
1104
1105 #ifndef N_MB_ENABLE /* For N MB processing update take place inside deblocking function */
1106 ps_deblk->i4_mb_x ++;
1107
1108 ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1109 /*
1110 * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
1111 * the stride per MB is MB_SIZE
1112 */
1113 ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1114 #endif
1115 }
1116
1117 return error_status;
1118 }
1119
1120 /**
1121 *******************************************************************************
1122 *
1123 * @brief initialize process context.
1124 *
1125 * @par Description:
1126 * Before dispatching the current job to process thread, the process context
1127 * associated with the job is initialized. Usually every job aims to encode one
1128 * row of mb's. Basing on the row indices provided by the job, the process
1129 * context's buffer ptrs, slice indices and other elements that are necessary
1130 * during core-coding are initialized.
1131 *
1132 * @param[in] ps_proc
1133 * Pointer to the current process context
1134 *
1135 * @returns error status
1136 *
1137 * @remarks none
1138 *
1139 *******************************************************************************
1140 */
ih264e_init_proc_ctxt(process_ctxt_t * ps_proc)1141 IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc)
1142 {
1143 /* codec context */
1144 codec_t *ps_codec = ps_proc->ps_codec;
1145
1146 /* nmb processing context*/
1147 n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt;
1148
1149 /* indices */
1150 WORD32 i4_mb_x, i4_mb_y;
1151
1152 /* strides */
1153 WORD32 i4_src_strd = ps_proc->i4_src_strd;
1154 WORD32 i4_src_chroma_strd = ps_proc->i4_src_chroma_strd;
1155 WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1156
1157 /* quant params */
1158 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
1159
1160 /* deblk ctxt */
1161 deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
1162
1163 /* deblk bs context */
1164 bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt);
1165
1166 /* Pointer to mv_buffer of current frame */
1167 mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf;
1168
1169 /* Pointers for color space conversion */
1170 UWORD8 *pu1_y_buf_base, *pu1_u_buf_base, *pu1_v_buf_base;
1171
1172 /* Pad the MB to support non standard sizes */
1173 UWORD32 u4_pad_right_sz = ps_codec->s_cfg.u4_wd - ps_codec->s_cfg.u4_disp_wd;
1174 UWORD32 u4_pad_bottom_sz = ps_codec->s_cfg.u4_ht - ps_codec->s_cfg.u4_disp_ht;
1175 UWORD16 u2_num_rows = MB_SIZE;
1176 WORD32 convert_uv_only;
1177
1178 /********************************************************************/
1179 /* BEGIN INIT */
1180 /********************************************************************/
1181
1182 i4_mb_x = ps_proc->i4_mb_x;
1183 i4_mb_y = ps_proc->i4_mb_y;
1184
1185 /* Number of mbs processed in one loop of process function */
1186 ps_proc->i4_nmb_ntrpy = ps_proc->i4_wd_mbs;
1187 ps_proc->u4_nmb_me = ps_proc->i4_wd_mbs;
1188
1189 /* init buffer pointers */
1190 convert_uv_only = 1;
1191 if (u4_pad_bottom_sz || u4_pad_right_sz ||
1192 ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE)
1193 {
1194 if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
1195 u2_num_rows = (UWORD16) MB_SIZE - u4_pad_bottom_sz;
1196 ps_proc->pu1_src_buf_luma_base = ps_codec->pu1_y_csc_buf_base;
1197 i4_src_strd = ps_proc->i4_src_strd = ps_codec->s_cfg.u4_max_wd;
1198 ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * MB_SIZE);
1199 convert_uv_only = 0;
1200 }
1201 else
1202 {
1203 i4_src_strd = ps_proc->i4_src_strd = ps_proc->s_inp_buf.s_raw_buf.au4_strd[0];
1204 ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_src_strd * (i4_mb_y * MB_SIZE);
1205 }
1206
1207
1208 if (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE ||
1209 ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420P ||
1210 ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1) ||
1211 u4_pad_bottom_sz || u4_pad_right_sz)
1212 {
1213 if ((ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_UV) ||
1214 (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU))
1215 ps_proc->pu1_src_buf_chroma_base = ps_codec->pu1_uv_csc_buf_base;
1216
1217 ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * BLK8x8SIZE);
1218 i4_src_chroma_strd = ps_proc->i4_src_chroma_strd = ps_codec->s_cfg.u4_max_wd;
1219 }
1220 else
1221 {
1222 i4_src_chroma_strd = ps_proc->i4_src_chroma_strd = ps_proc->s_inp_buf.s_raw_buf.au4_strd[1];
1223 ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_src_chroma_strd * (i4_mb_y * BLK8x8SIZE);
1224 }
1225
1226 ps_proc->pu1_rec_buf_luma = ps_proc->pu1_rec_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
1227 ps_proc->pu1_rec_buf_chroma = ps_proc->pu1_rec_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
1228
1229 /* Tempral back and forward reference buffer */
1230 ps_proc->apu1_ref_buf_luma[0] = ps_proc->apu1_ref_buf_luma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
1231 ps_proc->apu1_ref_buf_chroma[0] = ps_proc->apu1_ref_buf_chroma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
1232 ps_proc->apu1_ref_buf_luma[1] = ps_proc->apu1_ref_buf_luma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
1233 ps_proc->apu1_ref_buf_chroma[1] = ps_proc->apu1_ref_buf_chroma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
1234
1235 /*
1236 * Do color space conversion
1237 * NOTE : We assume there that the number of MB's to process will not span multiple rows
1238 */
1239 switch (ps_codec->s_cfg.e_inp_color_fmt)
1240 {
1241 case IV_YUV_420SP_UV:
1242 case IV_YUV_420SP_VU:
1243 /* In case of 420 semi-planar input, copy last few rows to intermediate
1244 buffer as chroma trans functions access one extra byte due to interleaved input.
1245 This data will be padded if required */
1246 if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1) || u4_pad_bottom_sz || u4_pad_right_sz)
1247 {
1248 WORD32 num_rows = MB_SIZE;
1249 UWORD8 *pu1_src;
1250 UWORD8 *pu1_dst;
1251 WORD32 i;
1252 pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) +
1253 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
1254
1255 pu1_dst = ps_proc->pu1_src_buf_luma;
1256
1257 /* If padding is required, we always copy luma, if padding isn't required we never copy luma. */
1258 if (u4_pad_bottom_sz || u4_pad_right_sz) {
1259 if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1))
1260 num_rows = MB_SIZE - u4_pad_bottom_sz;
1261 for (i = 0; i < num_rows; i++)
1262 {
1263 memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_wd);
1264 pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[0];
1265 pu1_dst += ps_proc->i4_src_strd;
1266 }
1267 }
1268 pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) +
1269 ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE);
1270 pu1_dst = ps_proc->pu1_src_buf_chroma;
1271
1272 /* Last MB row of chroma is copied unconditionally, since trans functions access an extra byte
1273 * due to interleaved input
1274 */
1275 if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1))
1276 num_rows = (ps_codec->s_cfg.u4_disp_ht >> 1) - (ps_proc->i4_mb_y * BLK8x8SIZE);
1277 else
1278 num_rows = BLK8x8SIZE;
1279 for (i = 0; i < num_rows; i++)
1280 {
1281 memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_wd);
1282 pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[1];
1283 pu1_dst += ps_proc->i4_src_chroma_strd;
1284 }
1285
1286 }
1287 break;
1288
1289 case IV_YUV_420P :
1290 pu1_y_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) +
1291 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
1292
1293 pu1_u_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) +
1294 ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE);
1295
1296 pu1_v_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[2] + (i4_mb_x * BLK8x8SIZE) +
1297 ps_proc->s_inp_buf.s_raw_buf.au4_strd[2] * (i4_mb_y * BLK8x8SIZE);
1298
1299 ps_codec->pf_ih264e_conv_420p_to_420sp(
1300 pu1_y_buf_base, pu1_u_buf_base, pu1_v_buf_base,
1301 ps_proc->pu1_src_buf_luma,
1302 ps_proc->pu1_src_buf_chroma, u2_num_rows,
1303 ps_codec->s_cfg.u4_disp_wd,
1304 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0],
1305 ps_proc->s_inp_buf.s_raw_buf.au4_strd[1],
1306 ps_proc->s_inp_buf.s_raw_buf.au4_strd[2],
1307 ps_proc->i4_src_strd, ps_proc->i4_src_chroma_strd,
1308 convert_uv_only);
1309 break;
1310
1311 case IV_YUV_422ILE :
1312 pu1_y_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE * 2)
1313 + ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
1314
1315 ps_codec->pf_ih264e_fmt_conv_422i_to_420sp(
1316 ps_proc->pu1_src_buf_luma,
1317 ps_proc->pu1_src_buf_chroma,
1318 ps_proc->pu1_src_buf_chroma + 1, pu1_y_buf_base,
1319 ps_codec->s_cfg.u4_disp_wd, u2_num_rows,
1320 ps_proc->i4_src_strd, ps_proc->i4_src_chroma_strd,
1321 ps_proc->i4_src_chroma_strd,
1322 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] >> 1);
1323 break;
1324
1325 default:
1326 break;
1327 }
1328
1329 if (u4_pad_right_sz && (ps_proc->i4_mb_x == 0))
1330 {
1331 UWORD32 u4_pad_wd, u4_pad_ht;
1332 u4_pad_wd = (UWORD32)(ps_proc->i4_src_strd - ps_codec->s_cfg.u4_disp_wd);
1333 u4_pad_wd = MIN(u4_pad_right_sz, u4_pad_wd);
1334 u4_pad_ht = MB_SIZE;
1335 if(ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
1336 u4_pad_ht = MIN(MB_SIZE, (MB_SIZE - u4_pad_bottom_sz));
1337
1338 ih264_pad_right_luma(
1339 ps_proc->pu1_src_buf_luma + ps_codec->s_cfg.u4_disp_wd,
1340 ps_proc->i4_src_strd, u4_pad_ht, u4_pad_wd);
1341
1342 ih264_pad_right_chroma(
1343 ps_proc->pu1_src_buf_chroma + ps_codec->s_cfg.u4_disp_wd,
1344 ps_proc->i4_src_chroma_strd, u4_pad_ht / 2, u4_pad_wd);
1345 }
1346
1347 /* pad bottom edge */
1348 if (u4_pad_bottom_sz && (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) && ps_proc->i4_mb_x == 0)
1349 {
1350 ih264_pad_bottom(ps_proc->pu1_src_buf_luma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_strd,
1351 ps_proc->i4_src_strd, ps_proc->i4_src_strd, u4_pad_bottom_sz);
1352
1353 ih264_pad_bottom(ps_proc->pu1_src_buf_chroma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_chroma_strd / 2,
1354 ps_proc->i4_src_chroma_strd, ps_proc->i4_src_chroma_strd, (u4_pad_bottom_sz / 2));
1355 }
1356
1357
1358 /* packed mb coeff data */
1359 ps_proc->pv_mb_coeff_data = ((UWORD8 *)ps_proc->pv_pic_mb_coeff_data) + i4_mb_y * ps_codec->u4_size_coeff_data;
1360
1361 /* packed mb header data */
1362 ps_proc->pv_mb_header_data = ((UWORD8 *)ps_proc->pv_pic_mb_header_data) + i4_mb_y * ps_codec->u4_size_header_data;
1363
1364 /* slice index */
1365 ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[i4_mb_y * ps_proc->i4_wd_mbs + i4_mb_x];
1366
1367 /*********************************************************************/
1368 /* ih264e_init_quant_params() routine is called at the pic init level*/
1369 /* this would have initialized the qp. */
1370 /* TODO_LATER: currently it is assumed that quant params donot change*/
1371 /* across mb's. When they do calculate update ps_qp_params accordingly*/
1372 /*********************************************************************/
1373
1374 /* init mv buffer ptr */
1375 ps_proc->ps_pu = ps_cur_mv_buf->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs *
1376 ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
1377
1378 /* Init co-located mv buffer */
1379 ps_proc->ps_colpu = ps_proc->aps_mv_buf[1]->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs *
1380 ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
1381
1382 if (i4_mb_y == 0)
1383 {
1384 ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu;
1385 }
1386 else
1387 {
1388 ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu + ((i4_mb_y - 1) * ps_proc->i4_wd_mbs *
1389 ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
1390 }
1391
1392 ps_proc->pu4_mb_pu_cnt = ps_cur_mv_buf->pu4_mb_pu_cnt + (i4_mb_y * ps_proc->i4_wd_mbs);
1393
1394 /* mb type */
1395 ps_proc->u4_mb_type = I16x16;
1396
1397 /* lambda */
1398 ps_proc->u4_lambda = gu1_qp0[ps_qp_params->u1_mb_qp];
1399
1400 /* mb distortion */
1401 ps_proc->i4_mb_distortion = SHRT_MAX;
1402
1403 if (i4_mb_x == 0)
1404 {
1405 ps_proc->s_left_mb_syntax_ele.i4_mb_distortion = 0;
1406
1407 ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion = 0;
1408
1409 ps_proc->s_top_left_mb_syntax_ME.i4_mb_distortion = 0;
1410
1411 if (i4_mb_y == 0)
1412 {
1413 memset(ps_proc->ps_top_row_mb_syntax_ele, 0, (ps_proc->i4_wd_mbs + 1)*sizeof(mb_info_t));
1414 }
1415 }
1416
1417 /* mb cost */
1418 ps_proc->i4_mb_cost = INT_MAX;
1419
1420 /**********************/
1421 /* init deblk context */
1422 /**********************/
1423 ps_deblk->i4_mb_x = ps_proc->i4_mb_x;
1424 /* deblk lags the current mb proc by 1 row */
1425 /* NOTE: Intra prediction has to happen with non deblocked samples used as reference */
1426 /* Hence to deblk MB 0 of row 0, you have wait till MB 0 of row 1 is encoded. */
1427 /* For simplicity, we chose to lag deblking by 1 Row wrt to proc */
1428 ps_deblk->i4_mb_y = ps_proc->i4_mb_y - 1;
1429
1430 /* buffer ptrs */
1431 ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + i4_rec_strd * (ps_deblk->i4_mb_y * MB_SIZE);
1432 ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + i4_rec_strd * (ps_deblk->i4_mb_y * BLK8x8SIZE);
1433
1434 /* init deblk bs context */
1435 /* mb indices */
1436 ps_bs->i4_mb_x = ps_proc->i4_mb_x;
1437 ps_bs->i4_mb_y = ps_proc->i4_mb_y;
1438
1439 /* init n_mb_process context */
1440 ps_n_mb_ctxt->i4_mb_x = 0;
1441 ps_n_mb_ctxt->i4_mb_y = ps_deblk->i4_mb_y;
1442 ps_n_mb_ctxt->i4_n_mbs = ps_proc->i4_nmb_ntrpy;
1443
1444 return IH264E_SUCCESS;
1445 }
1446
1447 /**
1448 *******************************************************************************
1449 *
1450 * @brief This function performs luma & chroma padding
1451 *
1452 * @par Description:
1453 *
1454 * @param[in] ps_proc
1455 * Process context corresponding to the job
1456 *
1457 * @param[in] pu1_curr_pic_luma
1458 * Pointer to luma buffer
1459 *
1460 * @param[in] pu1_curr_pic_chroma
1461 * Pointer to chroma buffer
1462 *
1463 * @param[in] i4_mb_x
1464 * mb index x
1465 *
1466 * @param[in] i4_mb_y
1467 * mb index y
1468 *
1469 * @param[in] i4_pad_ht
1470 * number of rows to be padded
1471 *
1472 * @returns error status
1473 *
1474 * @remarks none
1475 *
1476 *******************************************************************************
1477 */
ih264e_pad_recon_buffer(process_ctxt_t * ps_proc,UWORD8 * pu1_curr_pic_luma,UWORD8 * pu1_curr_pic_chroma,WORD32 i4_mb_x,WORD32 i4_mb_y,WORD32 i4_pad_ht)1478 IH264E_ERROR_T ih264e_pad_recon_buffer(process_ctxt_t *ps_proc,
1479 UWORD8 *pu1_curr_pic_luma,
1480 UWORD8 *pu1_curr_pic_chroma,
1481 WORD32 i4_mb_x,
1482 WORD32 i4_mb_y,
1483 WORD32 i4_pad_ht)
1484 {
1485 /* codec context */
1486 codec_t *ps_codec = ps_proc->ps_codec;
1487
1488 /* strides */
1489 WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1490
1491 if (i4_mb_x == 0)
1492 {
1493 /* padding left luma */
1494 ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, i4_pad_ht, PAD_LEFT);
1495
1496 /* padding left chroma */
1497 ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, i4_pad_ht >> 1, PAD_LEFT);
1498 }
1499 if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1500 {
1501 /* padding right luma */
1502 ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, i4_pad_ht, PAD_RIGHT);
1503
1504 /* padding right chroma */
1505 ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, i4_pad_ht >> 1, PAD_RIGHT);
1506
1507 if (i4_mb_y == ps_proc->i4_ht_mbs - 1)
1508 {
1509 UWORD8 *pu1_rec_luma = pu1_curr_pic_luma + MB_SIZE + PAD_RIGHT + ((i4_pad_ht - 1) * i4_rec_strd);
1510 UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma + MB_SIZE + PAD_RIGHT + (((i4_pad_ht >> 1) - 1) * i4_rec_strd);
1511
1512 /* padding bottom luma */
1513 ps_codec->pf_pad_bottom(pu1_rec_luma, i4_rec_strd, i4_rec_strd, PAD_BOT);
1514
1515 /* padding bottom chroma */
1516 ps_codec->pf_pad_bottom(pu1_rec_chroma, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1));
1517 }
1518 }
1519
1520 if (i4_mb_y == 0)
1521 {
1522 UWORD8 *pu1_rec_luma = pu1_curr_pic_luma;
1523 UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma;
1524 WORD32 wd = MB_SIZE;
1525
1526 if (i4_mb_x == 0)
1527 {
1528 pu1_rec_luma -= PAD_LEFT;
1529 pu1_rec_chroma -= PAD_LEFT;
1530
1531 wd += PAD_LEFT;
1532 }
1533 if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1534 {
1535 wd += PAD_RIGHT;
1536 }
1537
1538 /* padding top luma */
1539 ps_codec->pf_pad_top(pu1_rec_luma, i4_rec_strd, wd, PAD_TOP);
1540
1541 /* padding top chroma */
1542 ps_codec->pf_pad_top(pu1_rec_chroma, i4_rec_strd, wd, (PAD_TOP >> 1));
1543 }
1544
1545 return IH264E_SUCCESS;
1546 }
1547
1548
1549
1550
1551 /**
1552 *******************************************************************************
1553 *
1554 * @brief This function performs deblocking, padding and halfpel generation for
1555 * 'n' MBs
1556 *
1557 * @par Description:
1558 *
1559 * @param[in] ps_proc
1560 * Process context corresponding to the job
1561 *
1562 * @param[in] pu1_curr_pic_luma
1563 * Current MB being processed(Luma)
1564 *
1565 * @param[in] pu1_curr_pic_chroma
1566 * Current MB being processed(Chroma)
1567 *
1568 * @param[in] i4_mb_x
1569 * Column value of current MB processed
1570 *
1571 * @param[in] i4_mb_y
1572 * Curent row processed
1573 *
1574 * @returns error status
1575 *
1576 * @remarks none
1577 *
1578 *******************************************************************************
1579 */
ih264e_dblk_pad_hpel_processing_n_mbs(process_ctxt_t * ps_proc,UWORD8 * pu1_curr_pic_luma,UWORD8 * pu1_curr_pic_chroma,WORD32 i4_mb_x,WORD32 i4_mb_y)1580 IH264E_ERROR_T ih264e_dblk_pad_hpel_processing_n_mbs(process_ctxt_t *ps_proc,
1581 UWORD8 *pu1_curr_pic_luma,
1582 UWORD8 *pu1_curr_pic_chroma,
1583 WORD32 i4_mb_x,
1584 WORD32 i4_mb_y)
1585 {
1586 /* codec context */
1587 codec_t *ps_codec = ps_proc->ps_codec;
1588
1589 /* n_mb processing context */
1590 n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt;
1591
1592 /* deblk context */
1593 deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
1594
1595 /* strides */
1596 WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1597
1598 /* loop variables */
1599 WORD32 row, i, j, col;
1600
1601 /* Padding Width */
1602 UWORD32 u4_pad_wd;
1603
1604 /* deblk_map of the row being deblocked */
1605 UWORD8 *pu1_deblk_map = ps_proc->pu1_deblk_map + ps_deblk->i4_mb_y * ps_proc->i4_wd_mbs;
1606
1607 /* deblk_map_previous row */
1608 UWORD8 *pu1_deblk_map_prev_row = pu1_deblk_map - ps_proc->i4_wd_mbs;
1609
1610 WORD32 u4_pad_top = 0;
1611
1612 WORD32 u4_deblk_prev_row = 0;
1613
1614 /* Number of mbs to be processed */
1615 WORD32 i4_n_mbs = ps_n_mb_ctxt->i4_n_mbs;
1616
1617 /* Number of mbs actually processed
1618 * (at the end of a row, when remaining number of MBs are less than i4_n_mbs) */
1619 WORD32 i4_n_mb_process_count = 0;
1620
1621 UWORD8 *pu1_pad_bottom_src = NULL;
1622
1623 UWORD8 *pu1_pad_src_luma = NULL;
1624 UWORD8 *pu1_pad_src_chroma = NULL;
1625
1626 if (ps_proc->u4_disable_deblock_level == 1)
1627 {
1628 /* If left most MB is processed, then pad left */
1629 if (i4_mb_x == 0)
1630 {
1631 /* padding left luma */
1632 ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1633
1634 /* padding left chroma */
1635 ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT);
1636 }
1637 /*last col*/
1638 if (i4_mb_x == (ps_proc->i4_wd_mbs - 1))
1639 {
1640 /* padding right luma */
1641 ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1642
1643 /* padding right chroma */
1644 ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT);
1645 }
1646 }
1647
1648 if ((i4_mb_y > 0) || (i4_mb_y == (ps_proc->i4_ht_mbs - 1)))
1649 {
1650 /* if number of mb's to be processed are less than 'N', go back.
1651 * exception to the above clause is end of row */
1652 if ( ((i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1)) < i4_n_mbs) && (i4_mb_x < (ps_proc->i4_wd_mbs - 1)) )
1653 {
1654 return IH264E_SUCCESS;
1655 }
1656 else
1657 {
1658 i4_n_mb_process_count = MIN(i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1), i4_n_mbs);
1659
1660 /* performing deblocking for required number of MBs */
1661 if ((i4_mb_y > 0) && (ps_proc->u4_disable_deblock_level != 1))
1662 {
1663 u4_deblk_prev_row = 1;
1664
1665 /* checking whether the top rows are deblocked */
1666 for (col = 0; col < i4_n_mb_process_count; col++)
1667 {
1668 u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + col];
1669 }
1670
1671 /* checking whether the top right MB is deblocked */
1672 if ((ps_deblk->i4_mb_x + i4_n_mb_process_count) != ps_proc->i4_wd_mbs)
1673 {
1674 u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + i4_n_mb_process_count];
1675 }
1676
1677 /* Top or Top right MBs not deblocked */
1678 if ((u4_deblk_prev_row != 1) && (i4_mb_y > 0))
1679 {
1680 return IH264E_SUCCESS;
1681 }
1682
1683 for (row = 0; row < i4_n_mb_process_count; row++)
1684 {
1685 ih264e_deblock_mb(ps_proc, ps_deblk);
1686
1687 pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1688
1689 if (ps_deblk->i4_mb_y > 0)
1690 {
1691 if (ps_deblk->i4_mb_x == 0)/* If left most MB is processed, then pad left*/
1692 {
1693 /* padding left luma */
1694 ps_codec->pf_pad_left_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE, i4_rec_strd, MB_SIZE, PAD_LEFT);
1695
1696 /* padding left chroma */
1697 ps_codec->pf_pad_left_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT);
1698 }
1699
1700 if (ps_deblk->i4_mb_x == (ps_proc->i4_wd_mbs - 1))/*last column*/
1701 {
1702 /* padding right luma */
1703 ps_codec->pf_pad_right_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1704
1705 /* padding right chroma */
1706 ps_codec->pf_pad_right_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT);
1707 }
1708 }
1709 ps_deblk->i4_mb_x++;
1710
1711 ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1712 ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1713
1714 }
1715 }
1716 else if(i4_mb_y > 0)
1717 {
1718 ps_deblk->i4_mb_x += i4_n_mb_process_count;
1719
1720 ps_deblk->pu1_cur_pic_luma += i4_n_mb_process_count * MB_SIZE;
1721 ps_deblk->pu1_cur_pic_chroma += i4_n_mb_process_count * MB_SIZE;
1722 }
1723
1724 if (i4_mb_y == 2)
1725 {
1726 u4_pad_wd = i4_n_mb_process_count * MB_SIZE;
1727 u4_pad_top = ps_n_mb_ctxt->i4_mb_x * MB_SIZE;
1728
1729 if (ps_n_mb_ctxt->i4_mb_x == 0)
1730 {
1731 u4_pad_wd += PAD_LEFT;
1732 u4_pad_top = -PAD_LEFT;
1733 }
1734
1735 if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1736 {
1737 u4_pad_wd += PAD_RIGHT;
1738 }
1739
1740 /* padding top luma */
1741 ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_luma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, PAD_TOP);
1742
1743 /* padding top chroma */
1744 ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_chroma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, (PAD_TOP >> 1));
1745 }
1746
1747 ps_n_mb_ctxt->i4_mb_x += i4_n_mb_process_count;
1748
1749 if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1750 {
1751 if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
1752 {
1753 /* Bottom Padding is done in one stretch for the entire width */
1754 if (ps_proc->u4_disable_deblock_level != 1)
1755 {
1756 ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * MB_SIZE;
1757
1758 ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * BLK8x8SIZE;
1759
1760 ps_n_mb_ctxt->i4_mb_x = 0;
1761 ps_n_mb_ctxt->i4_mb_y = ps_proc->i4_mb_y;
1762 ps_deblk->i4_mb_x = 0;
1763 ps_deblk->i4_mb_y = ps_proc->i4_mb_y;
1764
1765 /* update pic qp map (as update_proc_ctxt is still not called for the last MB) */
1766 ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp;
1767
1768 i4_n_mb_process_count = (ps_proc->i4_wd_mbs) % i4_n_mbs;
1769
1770 j = (ps_proc->i4_wd_mbs) / i4_n_mbs;
1771
1772 for (i = 0; i < j; i++)
1773 {
1774 for (col = 0; col < i4_n_mbs; col++)
1775 {
1776 ih264e_deblock_mb(ps_proc, ps_deblk);
1777
1778 pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1779
1780 ps_deblk->i4_mb_x++;
1781 ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1782 ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1783 ps_n_mb_ctxt->i4_mb_x++;
1784 }
1785 }
1786
1787 for (col = 0; col < i4_n_mb_process_count; col++)
1788 {
1789 ih264e_deblock_mb(ps_proc, ps_deblk);
1790
1791 pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1792
1793 ps_deblk->i4_mb_x++;
1794 ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1795 ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1796 ps_n_mb_ctxt->i4_mb_x++;
1797 }
1798
1799 pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd;
1800
1801 pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd;
1802
1803 /* padding left luma */
1804 ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1805
1806 /* padding left chroma */
1807 ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT);
1808
1809 pu1_pad_src_luma += i4_rec_strd * MB_SIZE;
1810 pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE;
1811
1812 /* padding left luma */
1813 ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1814
1815 /* padding left chroma */
1816 ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT);
1817
1818 pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE;
1819
1820 pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE;
1821
1822 /* padding right luma */
1823 ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1824
1825 /* padding right chroma */
1826 ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT);
1827
1828 pu1_pad_src_luma += i4_rec_strd * MB_SIZE;
1829 pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE;
1830
1831 /* padding right luma */
1832 ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1833
1834 /* padding right chroma */
1835 ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT);
1836
1837 }
1838
1839 /* In case height is less than 2 MBs pad top */
1840 if (ps_proc->i4_ht_mbs <= 2)
1841 {
1842 UWORD8 *pu1_pad_top_src;
1843 /* padding top luma */
1844 pu1_pad_top_src = ps_proc->pu1_rec_buf_luma_base - PAD_LEFT;
1845 ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, PAD_TOP);
1846
1847 /* padding top chroma */
1848 pu1_pad_top_src = ps_proc->pu1_rec_buf_chroma_base - PAD_LEFT;
1849 ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, (PAD_TOP >> 1));
1850 }
1851
1852 /* padding bottom luma */
1853 pu1_pad_bottom_src = ps_proc->pu1_rec_buf_luma_base + ps_proc->i4_ht_mbs * MB_SIZE * i4_rec_strd - PAD_LEFT;
1854 ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, PAD_BOT);
1855
1856 /* padding bottom chroma */
1857 pu1_pad_bottom_src = ps_proc->pu1_rec_buf_chroma_base + ps_proc->i4_ht_mbs * (MB_SIZE >> 1) * i4_rec_strd - PAD_LEFT;
1858 ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1));
1859 }
1860 }
1861 }
1862 }
1863
1864 return IH264E_SUCCESS;
1865 }
1866
1867
1868 /**
1869 *******************************************************************************
1870 *
1871 * @brief This function performs luma & chroma core coding for a set of mb's.
1872 *
1873 * @par Description:
1874 * The mb to be coded is taken and is evaluated over a predefined set of modes
1875 * (intra (i16, i4, i8)/inter (mv, skip)) for best cost. The mode with least cost
1876 * is selected and using intra/inter prediction filters, prediction is carried out.
1877 * The deviation between src and pred signal constitutes error signal. This error
1878 * signal is transformed (hierarchical transform if necessary) and quantized. The
1879 * quantized residue is packed in to entropy buffer for entropy coding. This is
1880 * repeated for all the mb's enlisted under the job.
1881 *
1882 * @param[in] ps_proc
1883 * Process context corresponding to the job
1884 *
1885 * @returns error status
1886 *
1887 * @remarks none
1888 *
1889 *******************************************************************************
1890 */
ih264e_process(process_ctxt_t * ps_proc)1891 WORD32 ih264e_process(process_ctxt_t *ps_proc)
1892 {
1893 /* error status */
1894 WORD32 error_status = IH264_SUCCESS;
1895
1896 /* codec context */
1897 codec_t *ps_codec = ps_proc->ps_codec;
1898
1899 /* cbp luma, chroma */
1900 UWORD32 u4_cbp_l, u4_cbp_c;
1901
1902 /* width in mbs */
1903 WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs;
1904
1905 /* loop var */
1906 WORD32 i4_mb_idx, i4_mb_cnt = ps_proc->i4_mb_cnt;
1907
1908 /* valid modes */
1909 UWORD32 u4_valid_modes = 0;
1910
1911 /* gate threshold */
1912 WORD32 i4_gate_threshold = 0;
1913
1914 /* is intra */
1915 WORD32 luma_idx, chroma_idx, is_intra;
1916
1917 /* temp variables */
1918 WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt % MAX_CTXT_SETS;
1919
1920 /*
1921 * list of modes for evaluation
1922 * -------------------------------------------------------------------------
1923 * Note on enabling I4x4 and I16x16
1924 * At very low QP's the hadamard transform in I16x16 will push up the maximum
1925 * coeff value very high. CAVLC may not be able to represent the value and
1926 * hence the stream may not be decodable in some clips.
1927 * Hence at low QPs, we will enable I4x4 and disable I16x16 irrespective of preset.
1928 */
1929 if (ps_proc->i4_slice_type == ISLICE)
1930 {
1931 if (ps_proc->u4_frame_qp > 10)
1932 {
1933 /* enable intra 16x16 */
1934 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
1935
1936 /* enable intra 8x8 */
1937 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_8x8 ? (1 << I8x8) : 0;
1938 }
1939
1940 /* enable intra 4x4 */
1941 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
1942 u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
1943
1944 }
1945 else if (ps_proc->i4_slice_type == PSLICE)
1946 {
1947 if (ps_proc->u4_frame_qp > 10)
1948 {
1949 /* enable intra 16x16 */
1950 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
1951 }
1952
1953 /* enable intra 4x4 */
1954 if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
1955 {
1956 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
1957 }
1958 u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
1959
1960 /* enable inter P16x16 */
1961 u4_valid_modes |= (1 << P16x16);
1962 }
1963 else if (ps_proc->i4_slice_type == BSLICE)
1964 {
1965 if (ps_proc->u4_frame_qp > 10)
1966 {
1967 /* enable intra 16x16 */
1968 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
1969 }
1970
1971 /* enable intra 4x4 */
1972 if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
1973 {
1974 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
1975 }
1976 u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
1977
1978 /* enable inter B16x16 */
1979 u4_valid_modes |= (1 << B16x16);
1980 }
1981
1982
1983 /* init entropy */
1984 ps_proc->s_entropy.i4_mb_x = ps_proc->i4_mb_x;
1985 ps_proc->s_entropy.i4_mb_y = ps_proc->i4_mb_y;
1986 ps_proc->s_entropy.i4_mb_cnt = MIN(ps_proc->i4_nmb_ntrpy, i4_wd_mbs - ps_proc->i4_mb_x);
1987
1988 /* compute recon when :
1989 * 1. current frame is to be used as a reference
1990 * 2. dump recon for bit stream sanity check
1991 */
1992 ps_proc->u4_compute_recon = ps_codec->u4_is_curr_frm_ref ||
1993 ps_codec->s_cfg.u4_enable_recon;
1994
1995 /* Encode 'n' macroblocks,
1996 * 'n' being the number of mbs dictated by current proc ctxt */
1997 for (i4_mb_idx = 0; i4_mb_idx < i4_mb_cnt; i4_mb_idx ++)
1998 {
1999 /* since we have not yet found sad, we have not yet got min sad */
2000 /* we need to initialize these variables for each MB */
2001 /* TODO how to get the min sad into the codec */
2002 ps_proc->u4_min_sad = ps_codec->s_cfg.i4_min_sad;
2003 ps_proc->u4_min_sad_reached = 0;
2004
2005 /* mb analysis */
2006 {
2007 /* temp var */
2008 WORD32 i4_mb_id = ps_proc->i4_mb_x + ps_proc->i4_mb_y * i4_wd_mbs;
2009
2010 /* force intra refresh ? */
2011 WORD32 i4_air_enable_inter = (ps_codec->s_cfg.e_air_mode == IVE_AIR_MODE_NONE) ||
2012 (ps_codec->pu2_intr_rfrsh_map[i4_mb_id] != ps_codec->i4_air_pic_cnt);
2013
2014 /* evaluate inter 16x16 modes */
2015 if ((u4_valid_modes & (1 << P16x16)) || (u4_valid_modes & (1 << B16x16)))
2016 {
2017 /* compute nmb me */
2018 if (ps_proc->i4_mb_x % ps_proc->u4_nmb_me == 0)
2019 {
2020 ih264e_compute_me_nmb(ps_proc, MIN((WORD32)ps_proc->u4_nmb_me,
2021 i4_wd_mbs - ps_proc->i4_mb_x));
2022 }
2023
2024 /* set pointers to ME data appropriately for other modules to use */
2025 {
2026 UWORD32 u4_mb_index = ps_proc->i4_mb_x % ps_proc->u4_nmb_me ;
2027
2028 /* get the min sad condition for current mb */
2029 ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached;
2030 ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad;
2031
2032 ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_skip_mv[0]);
2033 ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_mb_index].s_ngbr_avbl);
2034 ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_pred_mv[0]);
2035
2036 ps_proc->i4_mb_distortion = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_distortion;
2037 ps_proc->i4_mb_cost = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_cost;
2038 ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad;
2039 ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached;
2040 ps_proc->u4_mb_type = ps_proc->ps_nmb_info[u4_mb_index].u4_mb_type;
2041
2042 /* get the best sub pel buffer */
2043 ps_proc->pu1_best_subpel_buf = ps_proc->ps_nmb_info[u4_mb_index].pu1_best_sub_pel_buf;
2044 ps_proc->u4_bst_spel_buf_strd = ps_proc->ps_nmb_info[u4_mb_index].u4_bst_spel_buf_strd;
2045 }
2046 ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
2047 }
2048 else
2049 {
2050 /* Derive neighbor availability for the current macroblock */
2051 ps_proc->ps_ngbr_avbl = &ps_proc->s_ngbr_avbl;
2052
2053 ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
2054 }
2055
2056 /*
2057 * If air says intra, we need to force the following code path to evaluate intra
2058 * The easy way is just to say that the inter cost is too much
2059 */
2060 if (!i4_air_enable_inter)
2061 {
2062 ps_proc->u4_min_sad_reached = 0;
2063 ps_proc->i4_mb_cost = INT_MAX;
2064 ps_proc->i4_mb_distortion = INT_MAX;
2065 }
2066 else if (ps_proc->u4_mb_type == PSKIP)
2067 {
2068 goto UPDATE_MB_INFO;
2069 }
2070
2071 /* wait until the proc of [top + 1] mb is computed.
2072 * We wait till the proc dependencies are satisfied */
2073 if(ps_proc->i4_mb_y > 0)
2074 {
2075 /* proc map */
2076 UWORD8 *pu1_proc_map_top;
2077
2078 pu1_proc_map_top = ps_proc->pu1_proc_map + ((ps_proc->i4_mb_y - 1) * i4_wd_mbs);
2079
2080 while (1)
2081 {
2082 volatile UWORD8 *pu1_buf;
2083 WORD32 idx = i4_mb_idx + 1;
2084
2085 idx = MIN(idx, ((WORD32)ps_codec->s_cfg.i4_wd_mbs - 1));
2086 pu1_buf = pu1_proc_map_top + idx;
2087 if(*pu1_buf)
2088 break;
2089 ithread_yield();
2090 }
2091 }
2092
2093 /* If we already have the minimum sad, there is no point in searching for sad again */
2094 if (ps_proc->u4_min_sad_reached == 0)
2095 {
2096 /* intra gating in inter slices */
2097 /* No need of gating if we want to force intra, we need to find the threshold only if inter is enabled by AIR*/
2098 if (i4_air_enable_inter && ps_proc->i4_slice_type != ISLICE && ps_codec->u4_inter_gate)
2099 {
2100 /* distortion of neighboring blocks */
2101 WORD32 i4_distortion[4];
2102
2103 i4_distortion[0] = ps_proc->s_left_mb_syntax_ele.i4_mb_distortion;
2104
2105 i4_distortion[1] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x].i4_mb_distortion;
2106
2107 i4_distortion[2] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x + 1].i4_mb_distortion;
2108
2109 i4_distortion[3] = ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion;
2110
2111 i4_gate_threshold = (i4_distortion[0] + i4_distortion[1] + i4_distortion[2] + i4_distortion[3]) >> 2;
2112
2113 }
2114
2115
2116 /* If we are going to force intra we need to evaluate intra irrespective of gating */
2117 if ( (!i4_air_enable_inter) || ((i4_gate_threshold + 16 *((WORD32) ps_proc->u4_lambda)) < ps_proc->i4_mb_distortion))
2118 {
2119 /* evaluate intra 4x4 modes */
2120 if (u4_valid_modes & (1 << I4x4))
2121 {
2122 if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
2123 {
2124 ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton(ps_proc);
2125 }
2126 else
2127 {
2128 ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff(ps_proc);
2129 }
2130 }
2131
2132 /* evaluate intra 16x16 modes */
2133 if (u4_valid_modes & (1 << I16x16))
2134 {
2135 ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(ps_proc);
2136 }
2137
2138 /* evaluate intra 8x8 modes */
2139 if (u4_valid_modes & (1 << I8x8))
2140 {
2141 ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(ps_proc);
2142 }
2143
2144 }
2145 }
2146 }
2147
2148 /* is intra */
2149 if (ps_proc->u4_mb_type == I4x4 || ps_proc->u4_mb_type == I16x16 || ps_proc->u4_mb_type == I8x8)
2150 {
2151 luma_idx = ps_proc->u4_mb_type;
2152 chroma_idx = 0;
2153 is_intra = 1;
2154
2155 /* evaluate chroma blocks for intra */
2156 ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(ps_proc);
2157 }
2158 else
2159 {
2160 luma_idx = 3;
2161 chroma_idx = 1;
2162 is_intra = 0;
2163 }
2164 ps_proc->u4_is_intra = is_intra;
2165 ps_proc->ps_pu->b1_intra_flag = is_intra;
2166
2167 /* redo MV pred of neighbors in the case intra mb */
2168 /* TODO : currently called unconditionally, needs to be called only in the case of intra
2169 * to modify neighbors */
2170 if (ps_proc->i4_slice_type != ISLICE)
2171 {
2172 ih264e_mv_pred(ps_proc, ps_proc->i4_slice_type);
2173 }
2174
2175 /* Perform luma mb core coding */
2176 u4_cbp_l = (ps_codec->luma_energy_compaction)[luma_idx](ps_proc);
2177
2178 /* Perform luma mb core coding */
2179 u4_cbp_c = (ps_codec->chroma_energy_compaction)[chroma_idx](ps_proc);
2180
2181 /* coded block pattern */
2182 ps_proc->u4_cbp = (u4_cbp_c << 4) | u4_cbp_l;
2183
2184 if (!ps_proc->u4_is_intra)
2185 {
2186 if (ps_proc->i4_slice_type == BSLICE)
2187 {
2188 if (ih264e_find_bskip_params(ps_proc, PRED_L0))
2189 {
2190 ps_proc->u4_mb_type = (ps_proc->u4_cbp) ? BDIRECT : BSKIP;
2191 }
2192 }
2193 else if(!ps_proc->u4_cbp)
2194 {
2195 if (ih264e_find_pskip_params(ps_proc, PRED_L0))
2196 {
2197 ps_proc->u4_mb_type = PSKIP;
2198 }
2199 }
2200 }
2201
2202 UPDATE_MB_INFO:
2203
2204 /* Update mb sad, mb qp and intra mb cost. Will be used by rate control */
2205 ih264e_update_rc_mb_info(&ps_proc->s_frame_info, ps_proc);
2206
2207 /**********************************************************************/
2208 /* if disable deblock level is '0' this implies enable deblocking for */
2209 /* all edges of all macroblocks with out any restrictions */
2210 /* */
2211 /* if disable deblock level is '1' this implies disable deblocking for*/
2212 /* all edges of all macroblocks with out any restrictions */
2213 /* */
2214 /* if disable deblock level is '2' this implies enable deblocking for */
2215 /* all edges of all macroblocks except edges overlapping with slice */
2216 /* boundaries. This option is not currently supported by the encoder */
2217 /* hence the slice map should be of no significance to perform debloc */
2218 /* king */
2219 /**********************************************************************/
2220
2221 if (ps_proc->u4_compute_recon)
2222 {
2223 /* deblk context */
2224 /* src pointers */
2225 UWORD8 *pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma;
2226 UWORD8 *pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma;
2227
2228 /* src indices */
2229 UWORD32 i4_mb_x = ps_proc->i4_mb_x;
2230 UWORD32 i4_mb_y = ps_proc->i4_mb_y;
2231
2232 /* compute blocking strength */
2233 if (ps_proc->u4_disable_deblock_level != 1)
2234 {
2235 ih264e_compute_bs(ps_proc);
2236 }
2237
2238 /* nmb deblocking and hpel and padding */
2239 ih264e_dblk_pad_hpel_processing_n_mbs(ps_proc, pu1_cur_pic_luma,
2240 pu1_cur_pic_chroma, i4_mb_x,
2241 i4_mb_y);
2242 }
2243
2244 /* update the context after for coding next mb */
2245 error_status |= ih264e_update_proc_ctxt(ps_proc);
2246
2247 /* Once the last row is processed, mark the buffer status appropriately */
2248 if (ps_proc->i4_ht_mbs == ps_proc->i4_mb_y)
2249 {
2250 /* Pointer to current picture buffer structure */
2251 pic_buf_t *ps_cur_pic = ps_proc->ps_cur_pic;
2252
2253 /* Pointer to current picture's mv buffer structure */
2254 mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf;
2255
2256 /**********************************************************************/
2257 /* if disable deblock level is '0' this implies enable deblocking for */
2258 /* all edges of all macroblocks with out any restrictions */
2259 /* */
2260 /* if disable deblock level is '1' this implies disable deblocking for*/
2261 /* all edges of all macroblocks with out any restrictions */
2262 /* */
2263 /* if disable deblock level is '2' this implies enable deblocking for */
2264 /* all edges of all macroblocks except edges overlapping with slice */
2265 /* boundaries. This option is not currently supported by the encoder */
2266 /* hence the slice map should be of no significance to perform debloc */
2267 /* king */
2268 /**********************************************************************/
2269 error_status |= ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr, ps_cur_mv_buf->i4_buf_id , BUF_MGR_CODEC);
2270
2271 error_status |= ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_cur_pic->i4_buf_id , BUF_MGR_CODEC);
2272
2273 if (ps_codec->s_cfg.u4_enable_recon)
2274 {
2275 /* pic cnt */
2276 ps_codec->as_rec_buf[ctxt_sel].i4_pic_cnt = ps_proc->i4_pic_cnt;
2277
2278 /* rec buffers */
2279 ps_codec->as_rec_buf[ctxt_sel].s_pic_buf = *ps_proc->ps_cur_pic;
2280
2281 /* is last? */
2282 ps_codec->as_rec_buf[ctxt_sel].u4_is_last = ps_proc->s_entropy.u4_is_last;
2283
2284 /* frame time stamp */
2285 ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_high = ps_proc->s_entropy.u4_timestamp_high;
2286 ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_low = ps_proc->s_entropy.u4_timestamp_low;
2287 }
2288
2289 }
2290 }
2291
2292 DEBUG_HISTOGRAM_DUMP(ps_codec->s_cfg.i4_ht_mbs == ps_proc->i4_mb_y);
2293
2294 return error_status;
2295 }
2296
2297 /**
2298 *******************************************************************************
2299 *
2300 * @brief
2301 * Function to update rc context after encoding
2302 *
2303 * @par Description
2304 * This function updates the rate control context after the frame is encoded.
2305 * Number of bits consumed by the current frame, frame distortion, frame cost,
2306 * number of intra/inter mb's, ... are passed on to rate control context for
2307 * updating the rc model.
2308 *
2309 * @param[in] ps_codec
2310 * Handle to codec context
2311 *
2312 * @param[in] ctxt_sel
2313 * frame context selector
2314 *
2315 * @param[in] pic_cnt
2316 * pic count
2317 *
2318 * @returns i4_stuffing_byte
2319 * number of stuffing bytes (if necessary)
2320 *
2321 * @remarks
2322 *
2323 *******************************************************************************
2324 */
ih264e_update_rc_post_enc(codec_t * ps_codec,WORD32 ctxt_sel,WORD32 i4_is_first_frm)2325 WORD32 ih264e_update_rc_post_enc(codec_t *ps_codec, WORD32 ctxt_sel, WORD32 i4_is_first_frm)
2326 {
2327 /* proc set base idx */
2328 WORD32 i4_proc_ctxt_sel_base = ctxt_sel ? (MAX_PROCESS_CTXT / 2) : 0;
2329
2330 /* proc ctxt */
2331 process_ctxt_t *ps_proc = &ps_codec->as_process[i4_proc_ctxt_sel_base];
2332
2333 /* frame qp */
2334 UWORD8 u1_frame_qp = ps_codec->u4_frame_qp;
2335
2336 /* cbr rc return status */
2337 WORD32 i4_stuffing_byte = 0;
2338
2339 /* current frame stats */
2340 frame_info_t s_frame_info;
2341 picture_type_e rc_pic_type;
2342
2343 /* temp var */
2344 WORD32 i, j;
2345
2346 /********************************************************************/
2347 /* BEGIN INIT */
2348 /********************************************************************/
2349
2350 /* init frame info */
2351 irc_init_frame_info(&s_frame_info);
2352
2353 /* get frame info */
2354 for (i = 0; i < (WORD32)ps_codec->s_cfg.u4_num_cores; i++)
2355 {
2356 /*****************************************************************/
2357 /* One frame can be encoded by max of u4_num_cores threads */
2358 /* Accumulating the num mbs, sad, qp and intra_mb_cost from */
2359 /* u4_num_cores threads */
2360 /*****************************************************************/
2361 for (j = 0; j< MAX_MB_TYPE; j++)
2362 {
2363 s_frame_info.num_mbs[j] += ps_proc[i].s_frame_info.num_mbs[j];
2364
2365 s_frame_info.tot_mb_sad[j] += ps_proc[i].s_frame_info.tot_mb_sad[j];
2366
2367 s_frame_info.qp_sum[j] += ps_proc[i].s_frame_info.qp_sum[j];
2368 }
2369
2370 s_frame_info.intra_mb_cost_sum += ps_proc[i].s_frame_info.intra_mb_cost_sum;
2371
2372 s_frame_info.activity_sum += ps_proc[i].s_frame_info.activity_sum;
2373
2374 /*****************************************************************/
2375 /* gather number of residue and header bits consumed by the frame*/
2376 /*****************************************************************/
2377 ih264e_update_rc_bits_info(&s_frame_info, &ps_proc[i].s_entropy);
2378 }
2379
2380 /* get pic type */
2381 switch (ps_codec->pic_type)
2382 {
2383 case PIC_I:
2384 case PIC_IDR:
2385 rc_pic_type = I_PIC;
2386 break;
2387 case PIC_P:
2388 rc_pic_type = P_PIC;
2389 break;
2390 case PIC_B:
2391 rc_pic_type = B_PIC;
2392 break;
2393 default:
2394 assert(0);
2395 break;
2396 }
2397
2398 /* update rc lib with current frame stats */
2399 i4_stuffing_byte = ih264e_rc_post_enc(ps_codec->s_rate_control.pps_rate_control_api,
2400 &(s_frame_info),
2401 ps_codec->s_rate_control.pps_pd_frm_rate,
2402 ps_codec->s_rate_control.pps_time_stamp,
2403 ps_codec->s_rate_control.pps_frame_time,
2404 (ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs),
2405 &rc_pic_type,
2406 i4_is_first_frm,
2407 &ps_codec->s_rate_control.post_encode_skip[ctxt_sel],
2408 u1_frame_qp,
2409 &ps_codec->s_rate_control.num_intra_in_prev_frame,
2410 &ps_codec->s_rate_control.i4_avg_activity);
2411 return i4_stuffing_byte;
2412 }
2413
2414 /**
2415 *******************************************************************************
2416 *
2417 * @brief
2418 * entry point of a spawned encoder thread
2419 *
2420 * @par Description:
2421 * The encoder thread dequeues a proc/entropy job from the encoder queue and
2422 * calls necessary routines.
2423 *
2424 * @param[in] pv_proc
2425 * Process context corresponding to the thread
2426 *
2427 * @returns error status
2428 *
2429 * @remarks
2430 *
2431 *******************************************************************************
2432 */
ih264e_process_thread(void * pv_proc)2433 WORD32 ih264e_process_thread(void *pv_proc)
2434 {
2435 /* error status */
2436 IH264_ERROR_T ret = IH264_SUCCESS;
2437 WORD32 error_status = IH264_SUCCESS;
2438
2439 /* proc ctxt */
2440 process_ctxt_t *ps_proc = pv_proc;
2441
2442 /* codec ctxt */
2443 codec_t *ps_codec = ps_proc->ps_codec;
2444
2445 /* structure to represent a processing job entry */
2446 job_t s_job;
2447
2448 /* blocking call : entropy dequeue is non-blocking till all
2449 * the proc jobs are processed */
2450 WORD32 is_blocking = 0;
2451
2452 /* set affinity */
2453 ithread_set_affinity(ps_proc->i4_id);
2454
2455 while(1)
2456 {
2457 /* dequeue a job from the entropy queue */
2458 {
2459 int error = ithread_mutex_lock(ps_codec->pv_entropy_mutex);
2460
2461 /* codec context selector */
2462 WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
2463
2464 volatile UWORD32 *pu4_buf = &ps_codec->au4_entropy_thread_active[ctxt_sel];
2465
2466 /* have the lock */
2467 if (error == 0)
2468 {
2469 if (*pu4_buf == 0)
2470 {
2471 /* no entropy threads are active, try dequeuing a job from the entropy queue */
2472 ret = ih264_list_dequeue(ps_proc->pv_entropy_jobq, &s_job, is_blocking);
2473 if (IH264_SUCCESS == ret)
2474 {
2475 *pu4_buf = 1;
2476 ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2477 goto WORKER;
2478 }
2479 else if(is_blocking)
2480 {
2481 ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2482 break;
2483 }
2484 }
2485 ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2486 }
2487 }
2488
2489 /* dequeue a job from the process queue */
2490 ret = ih264_list_dequeue(ps_proc->pv_proc_jobq, &s_job, 1);
2491 if (IH264_SUCCESS != ret)
2492 {
2493 if(ps_proc->i4_id)
2494 break;
2495 else
2496 {
2497 is_blocking = 1;
2498 continue;
2499 }
2500 }
2501
2502 WORKER:
2503 /* choose appropriate proc context based on proc_base_idx */
2504 ps_proc = &ps_codec->as_process[ps_proc->i4_id + s_job.i2_proc_base_idx];
2505
2506 switch (s_job.i4_cmd)
2507 {
2508 case CMD_PROCESS:
2509 ps_proc->i4_mb_cnt = s_job.i2_mb_cnt;
2510 ps_proc->i4_mb_x = s_job.i2_mb_x;
2511 ps_proc->i4_mb_y = s_job.i2_mb_y;
2512
2513 /* init process context */
2514 ih264e_init_proc_ctxt(ps_proc);
2515
2516 /* core code all mbs enlisted under the current job */
2517 error_status |= ih264e_process(ps_proc);
2518 break;
2519
2520 case CMD_ENTROPY:
2521 ps_proc->s_entropy.i4_mb_x = s_job.i2_mb_x;
2522 ps_proc->s_entropy.i4_mb_y = s_job.i2_mb_y;
2523 ps_proc->s_entropy.i4_mb_cnt = s_job.i2_mb_cnt;
2524
2525 /* init entropy */
2526 ih264e_init_entropy_ctxt(ps_proc);
2527
2528 /* entropy code all mbs enlisted under the current job */
2529 error_status |= ih264e_entropy(ps_proc);
2530 break;
2531
2532 default:
2533 error_status |= IH264_FAIL;
2534 break;
2535 }
2536 }
2537
2538 /* send error code */
2539 ps_proc->i4_error_code = error_status;
2540 return ret;
2541 }
2542