1 /******************************************************************************
2 *
3 * Copyright (C) 2015 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20
21 /**
22 *******************************************************************************
23 * @file
24 * ih264e_process.c
25 *
26 * @brief
27 * Contains functions for codec thread
28 *
29 * @author
30 * Harish
31 *
32 * @par List of Functions:
33 * - ih264e_generate_sps_pps()
34 * - ih264e_init_entropy_ctxt()
35 * - ih264e_entropy()
36 * - ih264e_pack_header_data()
37 * - ih264e_update_proc_ctxt()
38 * - ih264e_init_proc_ctxt()
39 * - ih264e_pad_recon_buffer()
40 * - ih264e_dblk_pad_hpel_processing_n_mbs()
41 * - ih264e_process()
42 * - ih264e_set_rc_pic_params()
43 * - ih264e_update_rc_post_enc()
44 * - ih264e_process_thread()
45 *
46 * @remarks
47 * None
48 *
49 *******************************************************************************
50 */
51
52 /*****************************************************************************/
53 /* File Includes */
54 /*****************************************************************************/
55
56 /* System include files */
57 #include <stdio.h>
58 #include <stddef.h>
59 #include <stdlib.h>
60 #include <string.h>
61 #include <limits.h>
62 #include <assert.h>
63
64 /* User include files */
65 #include "ih264_typedefs.h"
66 #include "iv2.h"
67 #include "ive2.h"
68 #include "ih264_defs.h"
69 #include "ih264_debug.h"
70 #include "ime_distortion_metrics.h"
71 #include "ime_defs.h"
72 #include "ime_structs.h"
73 #include "ih264_error.h"
74 #include "ih264_structs.h"
75 #include "ih264_trans_quant_itrans_iquant.h"
76 #include "ih264_inter_pred_filters.h"
77 #include "ih264_mem_fns.h"
78 #include "ih264_padding.h"
79 #include "ih264_intra_pred_filters.h"
80 #include "ih264_deblk_edge_filters.h"
81 #include "ih264_cabac_tables.h"
82 #include "ih264_platform_macros.h"
83 #include "ih264_macros.h"
84 #include "ih264_buf_mgr.h"
85 #include "ih264e_error.h"
86 #include "ih264e_bitstream.h"
87 #include "ih264_common_tables.h"
88 #include "ih264_list.h"
89 #include "ih264e_defs.h"
90 #include "irc_cntrl_param.h"
91 #include "irc_frame_info_collector.h"
92 #include "ih264e_rate_control.h"
93 #include "ih264e_cabac_structs.h"
94 #include "ih264e_structs.h"
95 #include "ih264e_cabac.h"
96 #include "ih264e_process.h"
97 #include "ithread.h"
98 #include "ih264e_intra_modes_eval.h"
99 #include "ih264e_encode_header.h"
100 #include "ih264e_globals.h"
101 #include "ih264e_config.h"
102 #include "ih264e_trace.h"
103 #include "ih264e_statistics.h"
104 #include "ih264_cavlc_tables.h"
105 #include "ih264e_cavlc.h"
106 #include "ih264e_deblk.h"
107 #include "ih264e_me.h"
108 #include "ih264e_debug.h"
109 #include "ih264e_master.h"
110 #include "ih264e_utils.h"
111 #include "irc_mem_req_and_acq.h"
112 #include "irc_rate_control_api.h"
113 #include "ih264e_platform_macros.h"
114 #include "ime_statistics.h"
115
116
117 /*****************************************************************************/
118 /* Function Definitions */
119 /*****************************************************************************/
120
121 /**
122 ******************************************************************************
123 *
124 * @brief This function generates sps, pps set on request
125 *
126 * @par Description
127 * When the encoder is set in header generation mode, the following function
128 * is called. This generates sps and pps headers and returns the control back
129 * to caller.
130 *
131 * @param[in] ps_codec
132 * pointer to codec context
133 *
134 * @return success or failure error code
135 *
136 ******************************************************************************
137 */
ih264e_generate_sps_pps(codec_t * ps_codec)138 IH264E_ERROR_T ih264e_generate_sps_pps(codec_t *ps_codec)
139 {
140 /* choose between ping-pong process buffer set */
141 WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
142
143 /* entropy ctxt */
144 entropy_ctxt_t *ps_entropy = &ps_codec->as_process[ctxt_sel * MAX_PROCESS_THREADS].s_entropy;
145
146 /* Bitstream structure */
147 bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm;
148
149 /* sps */
150 sps_t *ps_sps = NULL;
151
152 /* pps */
153 pps_t *ps_pps = NULL;
154
155 /* output buff */
156 out_buf_t *ps_out_buf = &ps_codec->as_out_buf[ctxt_sel];
157
158
159 /********************************************************************/
160 /* initialize the bit stream buffer */
161 /********************************************************************/
162 ih264e_bitstrm_init(ps_bitstrm, ps_out_buf->s_bits_buf.pv_buf, ps_out_buf->s_bits_buf.u4_bufsize);
163
164 /********************************************************************/
165 /* BEGIN HEADER GENERATION */
166 /********************************************************************/
167 /*ps_codec->i4_pps_id ++;*/
168 ps_codec->i4_pps_id %= MAX_PPS_CNT;
169
170 /*ps_codec->i4_sps_id ++;*/
171 ps_codec->i4_sps_id %= MAX_SPS_CNT;
172
173 /* populate sps header */
174 ps_sps = ps_codec->ps_sps_base + ps_codec->i4_sps_id;
175 ih264e_populate_sps(ps_codec, ps_sps);
176
177 /* populate pps header */
178 ps_pps = ps_codec->ps_pps_base + ps_codec->i4_pps_id;
179 ih264e_populate_pps(ps_codec, ps_pps);
180
181 ps_entropy->i4_error_code = IH264E_SUCCESS;
182
183 /* generate sps */
184 ps_entropy->i4_error_code = ih264e_generate_sps(ps_bitstrm, ps_sps,
185 &ps_codec->s_cfg.s_vui);
186 if(ps_entropy->i4_error_code != IH264E_SUCCESS)
187 {
188 return ps_entropy->i4_error_code;
189 }
190 /* generate pps */
191 ps_entropy->i4_error_code = ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps);
192
193 /* queue output buffer */
194 ps_out_buf->s_bits_buf.u4_bytes = ps_bitstrm->u4_strm_buf_offset;
195
196 return ps_entropy->i4_error_code;
197 }
198
199 /**
200 *******************************************************************************
201 *
202 * @brief initialize entropy context.
203 *
204 * @par Description:
205 * Before invoking the call to perform to entropy coding the entropy context
206 * associated with the job needs to be initialized. This involves the start
207 * mb address, end mb address, slice index and the pointer to location at
208 * which the mb residue info and mb header info are packed.
209 *
210 * @param[in] ps_proc
211 * Pointer to the current process context
212 *
213 * @returns error status
214 *
215 * @remarks none
216 *
217 *******************************************************************************
218 */
ih264e_init_entropy_ctxt(process_ctxt_t * ps_proc)219 IH264E_ERROR_T ih264e_init_entropy_ctxt(process_ctxt_t *ps_proc)
220 {
221 /* codec context */
222 codec_t *ps_codec = ps_proc->ps_codec;
223
224 /* entropy ctxt */
225 entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
226
227 /* start address */
228 ps_entropy->i4_mb_start_add = ps_entropy->i4_mb_y * ps_entropy->i4_wd_mbs + ps_entropy->i4_mb_x;
229
230 /* end address */
231 ps_entropy->i4_mb_end_add = ps_entropy->i4_mb_start_add + ps_entropy->i4_mb_cnt;
232
233 /* slice index */
234 ps_entropy->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_entropy->i4_mb_start_add];
235
236 /* sof */
237 /* @ start of frame or start of a new slice, set sof flag */
238 if (ps_entropy->i4_mb_start_add == 0)
239 {
240 ps_entropy->i4_sof = 1;
241 }
242
243 if (ps_entropy->i4_mb_x == 0)
244 {
245 /* packed mb coeff data */
246 ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) +
247 ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data;
248
249 /* packed mb header data */
250 ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) +
251 ps_entropy->i4_mb_y * ps_codec->u4_size_header_data;
252 }
253
254 return IH264E_SUCCESS;
255 }
256
257 /**
258 *******************************************************************************
259 *
260 * @brief entry point for entropy coding
261 *
262 * @par Description
263 * This function calls lower level functions to perform entropy coding for a
264 * group (n rows) of mb's. After encoding 1 row of mb's, the function takes
265 * back the control, updates the ctxt and calls lower level functions again.
266 * This process is repeated till all the rows or group of mb's (which ever is
267 * minimum) are coded
268 *
269 * @param[in] ps_proc
270 * process context
271 *
272 * @returns error status
273 *
274 * @remarks
275 *
276 *******************************************************************************
277 */
278
ih264e_entropy(process_ctxt_t * ps_proc)279 IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc)
280 {
281 /* codec context */
282 codec_t *ps_codec = ps_proc->ps_codec;
283
284 /* entropy context */
285 entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
286
287 /* cabac context */
288 cabac_ctxt_t *ps_cabac_ctxt = ps_entropy->ps_cabac;
289
290 /* sps */
291 sps_t *ps_sps = ps_entropy->ps_sps_base + (ps_entropy->u4_sps_id % MAX_SPS_CNT);
292
293 /* pps */
294 pps_t *ps_pps = ps_entropy->ps_pps_base + (ps_entropy->u4_pps_id % MAX_PPS_CNT);
295
296 /* slice header */
297 slice_header_t *ps_slice_hdr = ps_entropy->ps_slice_hdr_base + (ps_entropy->i4_cur_slice_idx % MAX_SLICE_HDR_CNT);
298
299 /* slice type */
300 WORD32 i4_slice_type = ps_proc->i4_slice_type;
301
302 /* Bitstream structure */
303 bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm;
304
305 /* output buff */
306 out_buf_t s_out_buf;
307
308 /* sei params */
309 sei_params_t s_sei;
310
311 /* proc map */
312 UWORD8 *pu1_proc_map;
313
314 /* entropy map */
315 UWORD8 *pu1_entropy_map_curr;
316
317 /* proc base idx */
318 WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt % MAX_CTXT_SETS;
319
320 /* temp var */
321 WORD32 i4_wd_mbs, i4_ht_mbs;
322 UWORD32 u4_mb_cnt, u4_mb_idx, u4_mb_end_idx, u4_insert_per_idr;
323 WORD32 bitstream_start_offset, bitstream_end_offset;
324 /********************************************************************/
325 /* BEGIN INIT */
326 /********************************************************************/
327
328 /* entropy encode start address */
329 u4_mb_idx = ps_entropy->i4_mb_start_add;
330
331 /* entropy encode end address */
332 u4_mb_end_idx = ps_entropy->i4_mb_end_add;
333
334 /* width in mbs */
335 i4_wd_mbs = ps_entropy->i4_wd_mbs;
336
337 /* height in mbs */
338 i4_ht_mbs = ps_entropy->i4_ht_mbs;
339
340 /* total mb cnt */
341 u4_mb_cnt = i4_wd_mbs * i4_ht_mbs;
342
343 /* proc map */
344 pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs;
345
346 /* entropy map */
347 pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs;
348
349 /********************************************************************/
350 /* @ start of frame / slice, */
351 /* initialize the output buffer, */
352 /* initialize the bit stream buffer, */
353 /* check if sps and pps headers have to be generated, */
354 /* populate and generate slice header */
355 /********************************************************************/
356 if (ps_entropy->i4_sof)
357 {
358 /********************************************************************/
359 /* initialize the output buffer */
360 /********************************************************************/
361 s_out_buf = ps_codec->as_out_buf[ctxt_sel];
362
363 /* is last frame to encode */
364 s_out_buf.u4_is_last = ps_entropy->u4_is_last;
365
366 /* frame idx */
367 s_out_buf.u4_timestamp_high = ps_entropy->u4_timestamp_high;
368 s_out_buf.u4_timestamp_low = ps_entropy->u4_timestamp_low;
369
370 /********************************************************************/
371 /* initialize the bit stream buffer */
372 /********************************************************************/
373 ih264e_bitstrm_init(ps_bitstrm, s_out_buf.s_bits_buf.pv_buf, s_out_buf.s_bits_buf.u4_bufsize);
374
375 /********************************************************************/
376 /* BEGIN HEADER GENERATION */
377 /********************************************************************/
378 if (1 == ps_entropy->i4_gen_header)
379 {
380 /* generate sps */
381 ps_entropy->i4_error_code = ih264e_generate_sps(ps_bitstrm, ps_sps,
382 &ps_codec->s_cfg.s_vui);
383 RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
384 /* generate pps */
385 ps_entropy->i4_error_code = ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps);
386 RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
387
388 /* reset i4_gen_header */
389 ps_entropy->i4_gen_header = 0;
390 }
391
392 /* populate slice header */
393 ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps, ps_sps);
394
395 /* generate sei */
396 u4_insert_per_idr = (NAL_SLICE_IDR == ps_slice_hdr->i1_nal_unit_type);
397
398 memset(&s_sei, 0, sizeof(sei_params_t));
399 s_sei.u1_sei_mdcv_params_present_flag =
400 ps_codec->s_cfg.s_sei.u1_sei_mdcv_params_present_flag;
401 s_sei.s_sei_mdcv_params = ps_codec->s_cfg.s_sei.s_sei_mdcv_params;
402 s_sei.u1_sei_cll_params_present_flag =
403 ps_codec->s_cfg.s_sei.u1_sei_cll_params_present_flag;
404 s_sei.s_sei_cll_params = ps_codec->s_cfg.s_sei.s_sei_cll_params;
405 s_sei.u1_sei_ave_params_present_flag =
406 ps_codec->s_cfg.s_sei.u1_sei_ave_params_present_flag;
407 s_sei.s_sei_ave_params = ps_codec->s_cfg.s_sei.s_sei_ave_params;
408 s_sei.u1_sei_ccv_params_present_flag = 0;
409 s_sei.s_sei_ccv_params =
410 ps_codec->as_inp_list[ps_codec->i4_poc % MAX_NUM_BFRAMES].s_sei_ccv;
411 s_sei.u1_sei_sii_params_present_flag = ps_codec->s_cfg.s_sei.u1_sei_sii_params_present_flag;
412 s_sei.s_sei_sii_params = ps_codec->s_cfg.s_sei.s_sei_sii_params;
413
414 if((1 == ps_sps->i1_vui_parameters_present_flag) &&
415 (1 == ps_codec->s_cfg.s_vui.u1_video_signal_type_present_flag) &&
416 (1 == ps_codec->s_cfg.s_vui.u1_colour_description_present_flag) &&
417 (2 != ps_codec->s_cfg.s_vui.u1_colour_primaries) &&
418 (2 != ps_codec->s_cfg.s_vui.u1_matrix_coefficients) &&
419 (2 != ps_codec->s_cfg.s_vui.u1_transfer_characteristics) &&
420 (4 != ps_codec->s_cfg.s_vui.u1_transfer_characteristics) &&
421 (5 != ps_codec->s_cfg.s_vui.u1_transfer_characteristics))
422 {
423 s_sei.u1_sei_ccv_params_present_flag =
424 ps_codec->as_inp_list[ps_codec->i4_poc % MAX_NUM_BFRAMES].u1_sei_ccv_params_present_flag;
425 }
426
427 if((1 == s_sei.u1_sei_mdcv_params_present_flag && u4_insert_per_idr) ||
428 (1 == s_sei.u1_sei_cll_params_present_flag && u4_insert_per_idr) ||
429 (1 == s_sei.u1_sei_ave_params_present_flag && u4_insert_per_idr) ||
430 (1 == s_sei.u1_sei_ccv_params_present_flag) ||
431 (1 == s_sei.u1_sei_sii_params_present_flag))
432 {
433 ps_entropy->i4_error_code =
434 ih264e_generate_sei(ps_bitstrm, &s_sei, u4_insert_per_idr);
435 RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
436 }
437 ps_codec->as_inp_list[ps_codec->i4_poc % MAX_NUM_BFRAMES].u1_sei_ccv_params_present_flag = 0;
438
439 /* generate slice header */
440 ps_entropy->i4_error_code = ih264e_generate_slice_header(ps_bitstrm, ps_slice_hdr,
441 ps_pps, ps_sps);
442 RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
443 /* once start of frame / slice is done, you can reset it */
444 /* it is the responsibility of the caller to set this flag */
445 ps_entropy->i4_sof = 0;
446
447 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
448 {
449 BITSTREAM_BYTE_ALIGN(ps_bitstrm);
450 BITSTREAM_FLUSH(ps_bitstrm, ps_entropy->i4_error_code);
451 ih264e_init_cabac_ctxt(ps_entropy);
452 }
453 }
454
455 /* begin entropy coding for the mb set */
456 while (u4_mb_idx < u4_mb_end_idx)
457 {
458 /* init ptrs/indices */
459 if (ps_entropy->i4_mb_x == i4_wd_mbs)
460 {
461 ps_entropy->i4_mb_y++;
462 ps_entropy->i4_mb_x = 0;
463
464 /* packed mb coeff data */
465 ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) +
466 ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data;
467
468 /* packed mb header data */
469 ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) +
470 ps_entropy->i4_mb_y * ps_codec->u4_size_header_data;
471
472 /* proc map */
473 pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs;
474
475 /* entropy map */
476 pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs;
477 }
478
479 DEBUG("\nmb indices x, y %d, %d", ps_entropy->i4_mb_x, ps_entropy->i4_mb_y);
480 ENTROPY_TRACE("mb index x %d", ps_entropy->i4_mb_x);
481 ENTROPY_TRACE("mb index y %d", ps_entropy->i4_mb_y);
482
483 /* wait until the curr mb is core coded */
484 /* The wait for curr mb to be core coded is essential when entropy is launched
485 * as a separate job
486 */
487 while (1)
488 {
489 volatile UWORD8 *pu1_buf1;
490 WORD32 idx = ps_entropy->i4_mb_x;
491
492 pu1_buf1 = pu1_proc_map + idx;
493 if (*pu1_buf1)
494 break;
495 ithread_yield();
496 }
497
498
499 /* write mb layer */
500 ps_entropy->i4_error_code = ps_codec->pf_write_mb_syntax_layer
501 [ps_entropy->u1_entropy_coding_mode_flag][i4_slice_type](ps_entropy);
502 RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
503
504 /* Starting bitstream offset for header in bits */
505 bitstream_start_offset = GET_NUM_BITS(ps_bitstrm);
506
507 /* set entropy map */
508 pu1_entropy_map_curr[ps_entropy->i4_mb_x] = 1;
509
510 u4_mb_idx++;
511 ps_entropy->i4_mb_x++;
512 /* check for eof */
513 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
514 {
515 if (ps_entropy->i4_mb_x < i4_wd_mbs)
516 {
517 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
518 }
519 }
520
521 if (ps_entropy->i4_mb_x == i4_wd_mbs)
522 {
523 /* if slices are enabled */
524 if (ps_codec->s_cfg.e_slice_mode == IVE_SLICE_MODE_BLOCKS)
525 {
526 /* current slice index */
527 WORD32 i4_curr_slice_idx = ps_entropy->i4_cur_slice_idx;
528
529 /* slice map */
530 UWORD8 *pu1_slice_idx = ps_entropy->pu1_slice_idx;
531
532 /* No need to open a slice at end of frame. The current slice can be closed at the time
533 * of signaling eof flag.
534 */
535 if ((u4_mb_idx != u4_mb_cnt) && (i4_curr_slice_idx
536 != pu1_slice_idx[u4_mb_idx]))
537 {
538 if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag)
539 { /* mb skip run */
540 if ((i4_slice_type != ISLICE)
541 && *ps_entropy->pi4_mb_skip_run)
542 {
543 if (*ps_entropy->pi4_mb_skip_run)
544 {
545 PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run,
546 ps_entropy->i4_error_code, "mb skip run");
547 *ps_entropy->pi4_mb_skip_run = 0;
548 RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
549 }
550 }
551 /* put rbsp trailing bits for the previous slice */
552 ps_entropy->i4_error_code = ih264e_put_rbsp_trailing_bits(ps_bitstrm);
553 RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
554 }
555 else
556 {
557 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1);
558 }
559
560 /* update slice header pointer */
561 i4_curr_slice_idx = pu1_slice_idx[u4_mb_idx];
562 ps_entropy->i4_cur_slice_idx = i4_curr_slice_idx;
563 ps_slice_hdr = ps_entropy->ps_slice_hdr_base+ (i4_curr_slice_idx % MAX_SLICE_HDR_CNT);
564
565 /* populate slice header */
566 ps_entropy->i4_mb_start_add = u4_mb_idx;
567 ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps,
568 ps_sps);
569
570 /* generate slice header */
571 ps_entropy->i4_error_code = ih264e_generate_slice_header(
572 ps_bitstrm, ps_slice_hdr, ps_pps, ps_sps);
573 RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
574 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
575 {
576 BITSTREAM_BYTE_ALIGN(ps_bitstrm);
577 BITSTREAM_FLUSH(ps_bitstrm, ps_entropy->i4_error_code);
578 ih264e_init_cabac_ctxt(ps_entropy);
579 }
580 }
581 else
582 {
583 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag
584 && u4_mb_idx != u4_mb_cnt)
585 {
586 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
587 }
588 }
589 }
590 }
591
592 /* Ending bitstream offset for header in bits */
593 bitstream_end_offset = GET_NUM_BITS(ps_bitstrm);
594 ps_entropy->u4_header_bits[i4_slice_type == PSLICE] +=
595 bitstream_end_offset - bitstream_start_offset;
596 }
597
598 /* check for eof */
599 if (u4_mb_idx == u4_mb_cnt)
600 {
601 /* set end of frame flag */
602 ps_entropy->i4_eof = 1;
603 }
604 else
605 {
606 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag
607 && ps_codec->s_cfg.e_slice_mode
608 != IVE_SLICE_MODE_BLOCKS)
609 {
610 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
611 }
612 }
613
614 if (ps_entropy->i4_eof)
615 {
616 if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag)
617 {
618 /* mb skip run */
619 if ((i4_slice_type != ISLICE) && *ps_entropy->pi4_mb_skip_run)
620 {
621 if (*ps_entropy->pi4_mb_skip_run)
622 {
623 PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run,
624 ps_entropy->i4_error_code, "mb skip run");
625 *ps_entropy->pi4_mb_skip_run = 0;
626 RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
627 }
628 }
629 /* put rbsp trailing bits */
630 ps_entropy->i4_error_code = ih264e_put_rbsp_trailing_bits(ps_bitstrm);
631 RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
632 }
633 else
634 {
635 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1);
636 }
637
638 /* update current frame stats to rc library */
639 {
640 /* number of bytes to stuff */
641 WORD32 i4_stuff_bytes;
642
643 /* update */
644 i4_stuff_bytes = ih264e_update_rc_post_enc(
645 ps_codec, ctxt_sel,
646 (ps_proc->ps_codec->i4_poc == 0));
647
648 /* cbr rc - house keeping */
649 if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel])
650 {
651 ps_entropy->ps_bitstrm->u4_strm_buf_offset = 0;
652 }
653 else if (i4_stuff_bytes)
654 {
655 /* add filler nal units */
656 ps_entropy->i4_error_code = ih264e_add_filler_nal_unit(ps_bitstrm, i4_stuff_bytes);
657 RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
658 }
659 }
660
661 /*
662 *Frame number is to be incremented only if the current frame is a
663 * reference frame. After each successful frame encode, we increment
664 * frame number by 1
665 */
666 if (!ps_codec->s_rate_control.post_encode_skip[ctxt_sel]
667 && ps_codec->u4_is_curr_frm_ref)
668 {
669 ps_codec->i4_frame_num++;
670 }
671 /********************************************************************/
672 /* signal the output */
673 /********************************************************************/
674 ps_codec->as_out_buf[ctxt_sel].s_bits_buf.u4_bytes =
675 ps_entropy->ps_bitstrm->u4_strm_buf_offset;
676
677 DEBUG("entropy status %x", ps_entropy->i4_error_code);
678 }
679
680 /* Dont execute any further instructions until store synchronization took place */
681 DATA_SYNC();
682
683 /* allow threads to dequeue entropy jobs */
684 ps_codec->au4_entropy_thread_active[ctxt_sel] = 0;
685
686 return ps_entropy->i4_error_code;
687 }
688
689 /**
690 *******************************************************************************
691 *
692 * @brief Packs header information of a mb in to a buffer
693 *
694 * @par Description:
695 * After the deciding the mode info of a macroblock, the syntax elements
696 * associated with the mb are packed and stored. The entropy thread unpacks
697 * this buffer and generates the end bit stream.
698 *
699 * @param[in] ps_proc
700 * Pointer to the current process context
701 *
702 * @returns error status
703 *
704 * @remarks none
705 *
706 *******************************************************************************
707 */
ih264e_pack_header_data(process_ctxt_t * ps_proc)708 IH264E_ERROR_T ih264e_pack_header_data(process_ctxt_t *ps_proc)
709 {
710 /* curr mb type */
711 UWORD32 u4_mb_type = ps_proc->u4_mb_type;
712
713 /* pack mb syntax layer of curr mb (used for entropy coding) */
714 if (u4_mb_type == I4x4)
715 {
716 /* pointer to mb header storage space */
717 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
718 mb_hdr_i4x4_t *ps_mb_hdr = (mb_hdr_i4x4_t *)ps_proc->pv_mb_header_data;
719
720 /* temp var */
721 WORD32 i4, byte;
722
723 /* mb type plus mode */
724 ps_mb_hdr->common.u1_mb_type_mode = (ps_proc->u1_c_i8_mode << 6) + u4_mb_type;
725
726 /* cbp */
727 ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
728
729 /* mb qp delta */
730 ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
731
732 /* sub mb modes */
733 for (i4 = 0; i4 < 16; i4 ++)
734 {
735 byte = 0;
736
737 if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] ==
738 ps_proc->au1_intra_luma_mb_4x4_modes[i4])
739 {
740 byte |= 1;
741 }
742 else
743 {
744
745 if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] <
746 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4])
747 {
748 byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 1);
749 }
750 else
751 {
752 byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 1;
753 }
754 }
755
756 i4++;
757
758 if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] ==
759 ps_proc->au1_intra_luma_mb_4x4_modes[i4])
760 {
761 byte |= 16;
762 }
763 else
764 {
765
766 if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] <
767 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4])
768 {
769 byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 5);
770 }
771 else
772 {
773 byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 5;
774 }
775 }
776
777 ps_mb_hdr->au1_sub_blk_modes[i4 >> 1] = byte;
778 }
779
780 /* end of mb layer */
781 pu1_ptr += sizeof(mb_hdr_i4x4_t);
782 ps_proc->pv_mb_header_data = pu1_ptr;
783 }
784 else if (u4_mb_type == I16x16)
785 {
786 /* pointer to mb header storage space */
787 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
788 mb_hdr_i16x16_t *ps_mb_hdr = (mb_hdr_i16x16_t *)ps_proc->pv_mb_header_data;
789
790 /* mb type plus mode */
791 ps_mb_hdr->common.u1_mb_type_mode = (ps_proc->u1_c_i8_mode << 6) + (ps_proc->u1_l_i16_mode << 4) + u4_mb_type;
792
793 /* cbp */
794 ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
795
796 /* mb qp delta */
797 ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
798
799 /* end of mb layer */
800 pu1_ptr += sizeof(mb_hdr_i16x16_t);
801 ps_proc->pv_mb_header_data = pu1_ptr;
802 }
803 else if (u4_mb_type == P16x16)
804 {
805 /* pointer to mb header storage space */
806 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
807 mb_hdr_p16x16_t *ps_mb_hdr = (mb_hdr_p16x16_t *)ps_proc->pv_mb_header_data;
808
809 /* mb type */
810 ps_mb_hdr->common.u1_mb_type_mode = u4_mb_type;
811
812 /* cbp */
813 ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
814
815 /* mb qp delta */
816 ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
817
818 ps_mb_hdr->ai2_mv[0] = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx - ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
819
820 ps_mb_hdr->ai2_mv[1] = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy - ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
821
822 /* end of mb layer */
823 pu1_ptr += sizeof(mb_hdr_p16x16_t);
824 ps_proc->pv_mb_header_data = pu1_ptr;
825 }
826 else if (u4_mb_type == PSKIP)
827 {
828 /* pointer to mb header storage space */
829 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
830 mb_hdr_pskip_t *ps_mb_hdr = (mb_hdr_pskip_t *)ps_proc->pv_mb_header_data;
831
832 /* mb type */
833 ps_mb_hdr->common.u1_mb_type_mode = u4_mb_type;
834
835 /* end of mb layer */
836 pu1_ptr += sizeof(mb_hdr_pskip_t);
837 ps_proc->pv_mb_header_data = pu1_ptr;
838 }
839 else if(u4_mb_type == B16x16)
840 {
841
842 /* pointer to mb header storage space */
843 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
844 mb_hdr_b16x16_t *ps_mb_hdr = (mb_hdr_b16x16_t *)ps_proc->pv_mb_header_data;
845
846 UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode;
847
848 /* mb type plus mode */
849 ps_mb_hdr->common.u1_mb_type_mode = (u4_pred_mode << 4) + u4_mb_type;
850
851 /* cbp */
852 ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
853
854 /* mb qp delta */
855 ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
856
857 /* l0 & l1 me data */
858 if (u4_pred_mode != PRED_L1)
859 {
860 ps_mb_hdr->ai2_mv[0][0] = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx
861 - ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
862
863 ps_mb_hdr->ai2_mv[0][1] = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy
864 - ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
865 }
866 if (u4_pred_mode != PRED_L0)
867 {
868 ps_mb_hdr->ai2_mv[1][0] = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvx
869 - ps_proc->ps_pred_mv[1].s_mv.i2_mvx;
870
871 ps_mb_hdr->ai2_mv[1][1] = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvy
872 - ps_proc->ps_pred_mv[1].s_mv.i2_mvy;
873 }
874
875 /* end of mb layer */
876 pu1_ptr += sizeof(mb_hdr_b16x16_t);
877 ps_proc->pv_mb_header_data = pu1_ptr;
878
879 }
880 else if(u4_mb_type == BDIRECT)
881 {
882 /* pointer to mb header storage space */
883 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
884 mb_hdr_bdirect_t *ps_mb_hdr = (mb_hdr_bdirect_t *)ps_proc->pv_mb_header_data;
885
886 /* mb type plus mode */
887 ps_mb_hdr->common.u1_mb_type_mode = u4_mb_type;
888
889 /* cbp */
890 ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
891
892 /* mb qp delta */
893 ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
894
895 /* end of mb layer */
896 pu1_ptr += sizeof(mb_hdr_bdirect_t);
897 ps_proc->pv_mb_header_data = pu1_ptr;
898
899 }
900 else if(u4_mb_type == BSKIP)
901 {
902 UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode;
903
904 /* pointer to mb header storage space */
905 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
906 mb_hdr_bskip_t *ps_mb_hdr = (mb_hdr_bskip_t *)ps_proc->pv_mb_header_data;
907
908 /* mb type plus mode */
909 ps_mb_hdr->common.u1_mb_type_mode = (u4_pred_mode << 4) + u4_mb_type;
910
911 /* end of mb layer */
912 pu1_ptr += sizeof(mb_hdr_bskip_t);
913 ps_proc->pv_mb_header_data = pu1_ptr;
914 }
915
916 return IH264E_SUCCESS;
917 }
918
919 /**
920 *******************************************************************************
921 *
922 * @brief update process context after encoding an mb. This involves preserving
923 * the current mb information for later use, initialize the proc ctxt elements to
924 * encode next mb.
925 *
926 * @par Description:
927 * This function performs house keeping tasks after encoding an mb.
928 * After encoding an mb, various elements of the process context needs to be
929 * updated to encode the next mb. For instance, the source, recon and reference
930 * pointers, mb indices have to be adjusted to the next mb. The slice index of
931 * the current mb needs to be updated. If mb qp modulation is enabled, then if
932 * the qp changes the quant param structure needs to be updated. Also to encoding
933 * the next mb, the current mb info is used as part of mode prediction or mv
934 * prediction. Hence the current mb info has to preserved at top/top left/left
935 * locations.
936 *
937 * @param[in] ps_proc
938 * Pointer to the current process context
939 *
940 * @returns none
941 *
942 * @remarks none
943 *
944 *******************************************************************************
945 */
ih264e_update_proc_ctxt(process_ctxt_t * ps_proc)946 WORD32 ih264e_update_proc_ctxt(process_ctxt_t *ps_proc)
947 {
948 /* error status */
949 WORD32 error_status = IH264_SUCCESS;
950
951 /* codec context */
952 codec_t *ps_codec = ps_proc->ps_codec;
953
954 /* curr mb indices */
955 WORD32 i4_mb_x = ps_proc->i4_mb_x;
956 WORD32 i4_mb_y = ps_proc->i4_mb_y;
957
958 /* mb syntax elements of neighbors */
959 mb_info_t *ps_left_syn = &ps_proc->s_left_mb_syntax_ele;
960 mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + i4_mb_x;
961 mb_info_t *ps_top_left_syn = &ps_proc->s_top_left_mb_syntax_ele;
962
963 /* curr mb type */
964 UWORD32 u4_mb_type = ps_proc->u4_mb_type;
965
966 /* curr mb type */
967 UWORD32 u4_is_intra = ps_proc->u4_is_intra;
968
969 /* width in mbs */
970 WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs;
971
972 /*height in mbs*/
973 WORD32 i4_ht_mbs = ps_proc->i4_ht_mbs;
974
975 /* proc map */
976 UWORD8 *pu1_proc_map = ps_proc->pu1_proc_map + (i4_mb_y * i4_wd_mbs);
977
978 /* deblk context */
979 deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
980
981 /* deblk bs context */
982 bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt);
983
984 /* top row motion vector info */
985 enc_pu_t *ps_top_row_pu = ps_proc->ps_top_row_pu + i4_mb_x;
986
987 /* top left mb motion vector */
988 enc_pu_t *ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu;
989
990 /* left mb motion vector */
991 enc_pu_t *ps_left_mb_pu = &ps_proc->s_left_mb_pu;
992
993 /* sub mb modes */
994 UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (i4_mb_x << 4);
995
996 /*************************************************************/
997 /* During MV prediction, when top right mb is not available, */
998 /* top left mb info. is used for prediction. Hence the curr */
999 /* top, which will be top left for the next mb needs to be */
1000 /* preserved before updating it with curr mb info. */
1001 /*************************************************************/
1002
1003 /* mb type, mb class, csbp */
1004 *ps_top_left_syn = *ps_top_syn;
1005
1006 if (ps_proc->i4_slice_type != ISLICE)
1007 {
1008 /*****************************************/
1009 /* update top left with top info results */
1010 /*****************************************/
1011 /* mv */
1012 *ps_top_left_mb_pu = *ps_top_row_pu;
1013 }
1014
1015 /*************************************************/
1016 /* update top and left with curr mb info results */
1017 /*************************************************/
1018
1019 /* mb type */
1020 ps_left_syn->u2_mb_type = ps_top_syn->u2_mb_type = u4_mb_type;
1021
1022 /* mb class */
1023 ps_left_syn->u2_is_intra = ps_top_syn->u2_is_intra = u4_is_intra;
1024
1025 /* csbp */
1026 ps_left_syn->u4_csbp = ps_top_syn->u4_csbp = ps_proc->u4_csbp;
1027
1028 /* distortion */
1029 ps_left_syn->i4_mb_distortion = ps_top_syn->i4_mb_distortion = ps_proc->i4_mb_distortion;
1030
1031 if (u4_is_intra)
1032 {
1033 /* mb / sub mb modes */
1034 if (I16x16 == u4_mb_type)
1035 {
1036 pu1_top_mb_intra_modes[0] = ps_proc->au1_left_mb_intra_modes[0] = ps_proc->u1_l_i16_mode;
1037 }
1038 else if (I4x4 == u4_mb_type)
1039 {
1040 ps_codec->pf_mem_cpy_mul8(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16);
1041 ps_codec->pf_mem_cpy_mul8(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16);
1042 }
1043 else if (I8x8 == u4_mb_type)
1044 {
1045 memcpy(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4);
1046 memcpy(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4);
1047 }
1048
1049 if ((ps_proc->i4_slice_type == PSLICE) ||(ps_proc->i4_slice_type == BSLICE))
1050 {
1051 /* mv */
1052 *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu);
1053 }
1054
1055 *ps_proc->pu4_mb_pu_cnt = 1;
1056 }
1057 else
1058 {
1059 /* mv */
1060 *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu);
1061 }
1062
1063 /*
1064 * Mark that the MB has been coded intra
1065 * So that future AIRs can skip it
1066 */
1067 ps_proc->pu1_is_intra_coded[i4_mb_x + (i4_mb_y * i4_wd_mbs)] = u4_is_intra;
1068
1069 /**************************************************/
1070 /* pack mb header info. for entropy coding */
1071 /**************************************************/
1072 ih264e_pack_header_data(ps_proc);
1073
1074 /* update previous mb qp */
1075 ps_proc->u4_mb_qp_prev = ps_proc->u4_mb_qp;
1076
1077 /* store qp */
1078 ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp;
1079
1080 /*
1081 * We need to sync the cache to make sure that the nmv content of proc
1082 * is updated to cache properly
1083 */
1084 DATA_SYNC();
1085
1086 /* Just before finishing the row, enqueue the job in to entropy queue.
1087 * The master thread depending on its convenience shall dequeue it and
1088 * performs entropy.
1089 *
1090 * WARN !! Placing this block post proc map update can cause queuing of
1091 * entropy jobs in out of order.
1092 */
1093 if (i4_mb_x == i4_wd_mbs - 1)
1094 {
1095 /* job structures */
1096 job_t s_job;
1097
1098 /* job class */
1099 s_job.i4_cmd = CMD_ENTROPY;
1100
1101 /* number of mbs to be processed in the current job */
1102 s_job.i2_mb_cnt = ps_codec->s_cfg.i4_wd_mbs;
1103
1104 /* job start index x */
1105 s_job.i2_mb_x = 0;
1106
1107 /* job start index y */
1108 s_job.i2_mb_y = ps_proc->i4_mb_y;
1109
1110 /* proc base idx */
1111 s_job.i2_proc_base_idx = (ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS) ? (MAX_PROCESS_CTXT / 2) : 0;
1112
1113 /* queue the job */
1114 error_status = ih264_list_queue(ps_proc->pv_entropy_jobq, &s_job, 1);
1115 if(error_status != IH264_SUCCESS)
1116 {
1117 return error_status;
1118 }
1119 if(ps_proc->i4_mb_y == (i4_ht_mbs - 1))
1120 ih264_list_terminate(ps_codec->pv_entropy_jobq);
1121 }
1122
1123 /* update proc map */
1124 pu1_proc_map[i4_mb_x] = 1;
1125
1126 /**************************************************/
1127 /* update proc ctxt elements for encoding next mb */
1128 /**************************************************/
1129 /* update indices */
1130 i4_mb_x ++;
1131 ps_proc->i4_mb_x = i4_mb_x;
1132
1133 if (ps_proc->i4_mb_x == i4_wd_mbs)
1134 {
1135 ps_proc->i4_mb_y++;
1136 ps_proc->i4_mb_x = 0;
1137 }
1138
1139 /* update slice index */
1140 ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_proc->i4_mb_y * i4_wd_mbs + ps_proc->i4_mb_x];
1141
1142 /* update buffers pointers */
1143 ps_proc->pu1_src_buf_luma += MB_SIZE;
1144 ps_proc->pu1_rec_buf_luma += MB_SIZE;
1145 ps_proc->apu1_ref_buf_luma[0] += MB_SIZE;
1146 ps_proc->apu1_ref_buf_luma[1] += MB_SIZE;
1147
1148 /*
1149 * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
1150 * the stride per MB is MB_SIZE
1151 */
1152 ps_proc->pu1_src_buf_chroma += MB_SIZE;
1153 ps_proc->pu1_rec_buf_chroma += MB_SIZE;
1154 ps_proc->apu1_ref_buf_chroma[0] += MB_SIZE;
1155 ps_proc->apu1_ref_buf_chroma[1] += MB_SIZE;
1156
1157
1158
1159 /* Reset cost, distortion params */
1160 ps_proc->i4_mb_cost = INT_MAX;
1161 ps_proc->i4_mb_distortion = SHRT_MAX;
1162
1163 ps_proc->ps_pu += *ps_proc->pu4_mb_pu_cnt;
1164
1165 ps_proc->pu4_mb_pu_cnt += 1;
1166
1167 /* Update colocated pu */
1168 if (ps_proc->i4_slice_type == BSLICE)
1169 ps_proc->ps_colpu += *(ps_proc->aps_mv_buf[1]->pu4_mb_pu_cnt + (i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x);
1170
1171 /* deblk ctxts */
1172 if (ps_proc->u4_disable_deblock_level != 1)
1173 {
1174 /* indices */
1175 ps_bs->i4_mb_x = ps_proc->i4_mb_x;
1176 ps_bs->i4_mb_y = ps_proc->i4_mb_y;
1177
1178 #ifndef N_MB_ENABLE /* For N MB processing update take place inside deblocking function */
1179 ps_deblk->i4_mb_x ++;
1180
1181 ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1182 /*
1183 * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
1184 * the stride per MB is MB_SIZE
1185 */
1186 ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1187 #endif
1188 }
1189
1190 return error_status;
1191 }
1192
1193 /**
1194 *******************************************************************************
1195 *
1196 * @brief initialize process context.
1197 *
1198 * @par Description:
1199 * Before dispatching the current job to process thread, the process context
1200 * associated with the job is initialized. Usually every job aims to encode one
1201 * row of mb's. Basing on the row indices provided by the job, the process
1202 * context's buffer ptrs, slice indices and other elements that are necessary
1203 * during core-coding are initialized.
1204 *
1205 * @param[in] ps_proc
1206 * Pointer to the current process context
1207 *
1208 * @returns error status
1209 *
1210 * @remarks none
1211 *
1212 *******************************************************************************
1213 */
ih264e_init_proc_ctxt(process_ctxt_t * ps_proc)1214 IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc)
1215 {
1216 /* codec context */
1217 codec_t *ps_codec = ps_proc->ps_codec;
1218
1219 /* nmb processing context*/
1220 n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt;
1221
1222 /* indices */
1223 WORD32 i4_mb_x, i4_mb_y;
1224
1225 /* strides */
1226 WORD32 i4_src_strd = ps_proc->i4_src_strd;
1227 WORD32 i4_src_chroma_strd = ps_proc->i4_src_chroma_strd;
1228 WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1229
1230 /* quant params */
1231 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
1232
1233 /* deblk ctxt */
1234 deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
1235
1236 /* deblk bs context */
1237 bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt);
1238
1239 /* Pointer to mv_buffer of current frame */
1240 mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf;
1241
1242 /* Pointers for color space conversion */
1243 UWORD8 *pu1_y_buf_base, *pu1_u_buf_base, *pu1_v_buf_base;
1244
1245 /* Pad the MB to support non standard sizes */
1246 UWORD32 u4_pad_right_sz = ps_codec->s_cfg.u4_wd - ps_codec->s_cfg.u4_disp_wd;
1247 UWORD32 u4_pad_bottom_sz = ps_codec->s_cfg.u4_ht - ps_codec->s_cfg.u4_disp_ht;
1248 UWORD16 u2_num_rows = MB_SIZE;
1249 WORD32 convert_uv_only;
1250
1251 /********************************************************************/
1252 /* BEGIN INIT */
1253 /********************************************************************/
1254
1255 i4_mb_x = ps_proc->i4_mb_x;
1256 i4_mb_y = ps_proc->i4_mb_y;
1257
1258 /* Number of mbs processed in one loop of process function */
1259 ps_proc->i4_nmb_ntrpy = ps_proc->i4_wd_mbs;
1260 ps_proc->u4_nmb_me = ps_proc->i4_wd_mbs;
1261
1262 /* init buffer pointers */
1263 convert_uv_only = 1;
1264 if (u4_pad_bottom_sz || u4_pad_right_sz ||
1265 ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE ||
1266 ps_codec->s_cfg.u4_enable_quality_metrics & QUALITY_MASK_PSNR ||
1267 ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1))
1268 {
1269 if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
1270 u2_num_rows = (UWORD16) MB_SIZE - u4_pad_bottom_sz;
1271 ps_proc->pu1_src_buf_luma_base = ps_codec->pu1_y_csc_buf_base;
1272 i4_src_strd = ps_proc->i4_src_strd = ps_codec->s_cfg.u4_max_wd;
1273 ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * MB_SIZE);
1274 convert_uv_only = 0;
1275 }
1276 else
1277 {
1278 i4_src_strd = ps_proc->i4_src_strd = ps_proc->s_inp_buf.s_raw_buf.au4_strd[0];
1279 ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_src_strd * (i4_mb_y * MB_SIZE);
1280 }
1281
1282
1283 if (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE ||
1284 ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420P ||
1285 ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1) ||
1286 ps_codec->s_cfg.u4_enable_quality_metrics & QUALITY_MASK_PSNR ||
1287 u4_pad_bottom_sz || u4_pad_right_sz)
1288 {
1289 if ((ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_UV) ||
1290 (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU))
1291 ps_proc->pu1_src_buf_chroma_base = ps_codec->pu1_uv_csc_buf_base;
1292
1293 ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * BLK8x8SIZE);
1294 i4_src_chroma_strd = ps_proc->i4_src_chroma_strd = ps_codec->s_cfg.u4_max_wd;
1295 }
1296 else
1297 {
1298 i4_src_chroma_strd = ps_proc->i4_src_chroma_strd = ps_proc->s_inp_buf.s_raw_buf.au4_strd[1];
1299 ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_src_chroma_strd * (i4_mb_y * BLK8x8SIZE);
1300 }
1301
1302 ps_proc->pu1_rec_buf_luma = ps_proc->pu1_rec_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
1303 ps_proc->pu1_rec_buf_chroma = ps_proc->pu1_rec_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
1304
1305 /* Tempral back and forward reference buffer */
1306 ps_proc->apu1_ref_buf_luma[0] = ps_proc->apu1_ref_buf_luma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
1307 ps_proc->apu1_ref_buf_chroma[0] = ps_proc->apu1_ref_buf_chroma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
1308 ps_proc->apu1_ref_buf_luma[1] = ps_proc->apu1_ref_buf_luma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
1309 ps_proc->apu1_ref_buf_chroma[1] = ps_proc->apu1_ref_buf_chroma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
1310
1311 /*
1312 * Do color space conversion
1313 * NOTE : We assume there that the number of MB's to process will not span multiple rows
1314 */
1315 switch (ps_codec->s_cfg.e_inp_color_fmt)
1316 {
1317 case IV_YUV_420SP_UV:
1318 case IV_YUV_420SP_VU:
1319 /* In case of 420 semi-planar input, copy last few rows to intermediate
1320 buffer as few SIMD functions access upto 16 more bytes.
1321 This data will be padded if required */
1322 if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1) || u4_pad_bottom_sz || u4_pad_right_sz)
1323 {
1324 WORD32 num_rows = MB_SIZE;
1325 UWORD8 *pu1_src;
1326 UWORD8 *pu1_dst;
1327 WORD32 i;
1328 pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) +
1329 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
1330
1331 pu1_dst = ps_proc->pu1_src_buf_luma;
1332
1333 if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1))
1334 num_rows = MB_SIZE - u4_pad_bottom_sz;
1335 for (i = 0; i < num_rows; i++)
1336 {
1337 memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_disp_wd);
1338 pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[0];
1339 pu1_dst += ps_proc->i4_src_strd;
1340 }
1341 pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) +
1342 ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE);
1343 pu1_dst = ps_proc->pu1_src_buf_chroma;
1344
1345 /* Last MB row of chroma is copied unconditionally, since trans functions access an extra byte
1346 * due to interleaved input
1347 */
1348 if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1))
1349 num_rows = (ps_codec->s_cfg.u4_disp_ht >> 1) - (ps_proc->i4_mb_y * BLK8x8SIZE);
1350 else
1351 num_rows = BLK8x8SIZE;
1352 for (i = 0; i < num_rows; i++)
1353 {
1354 memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_disp_wd);
1355 pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[1];
1356 pu1_dst += ps_proc->i4_src_chroma_strd;
1357 }
1358
1359 }
1360 break;
1361
1362 case IV_YUV_420P :
1363 pu1_y_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) +
1364 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
1365
1366 pu1_u_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) +
1367 ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE);
1368
1369 pu1_v_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[2] + (i4_mb_x * BLK8x8SIZE) +
1370 ps_proc->s_inp_buf.s_raw_buf.au4_strd[2] * (i4_mb_y * BLK8x8SIZE);
1371
1372 ps_codec->pf_ih264e_conv_420p_to_420sp(
1373 pu1_y_buf_base, pu1_u_buf_base, pu1_v_buf_base,
1374 ps_proc->pu1_src_buf_luma,
1375 ps_proc->pu1_src_buf_chroma, u2_num_rows,
1376 ps_codec->s_cfg.u4_disp_wd,
1377 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0],
1378 ps_proc->s_inp_buf.s_raw_buf.au4_strd[1],
1379 ps_proc->s_inp_buf.s_raw_buf.au4_strd[2],
1380 ps_proc->i4_src_strd, ps_proc->i4_src_chroma_strd,
1381 convert_uv_only);
1382 break;
1383
1384 case IV_YUV_422ILE :
1385 pu1_y_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE * 2)
1386 + ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
1387
1388 ps_codec->pf_ih264e_fmt_conv_422i_to_420sp(
1389 ps_proc->pu1_src_buf_luma,
1390 ps_proc->pu1_src_buf_chroma,
1391 ps_proc->pu1_src_buf_chroma + 1, pu1_y_buf_base,
1392 ps_codec->s_cfg.u4_disp_wd, u2_num_rows,
1393 ps_proc->i4_src_strd, ps_proc->i4_src_chroma_strd,
1394 ps_proc->i4_src_chroma_strd,
1395 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] >> 1);
1396 break;
1397
1398 default:
1399 break;
1400 }
1401
1402 if (u4_pad_right_sz && (ps_proc->i4_mb_x == 0))
1403 {
1404 UWORD32 u4_pad_wd, u4_pad_ht;
1405 u4_pad_wd = (UWORD32)(ps_proc->i4_src_strd - ps_codec->s_cfg.u4_disp_wd);
1406 u4_pad_wd = MIN(u4_pad_right_sz, u4_pad_wd);
1407 u4_pad_ht = MB_SIZE;
1408 if(ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
1409 u4_pad_ht = MIN(MB_SIZE, (MB_SIZE - u4_pad_bottom_sz));
1410
1411 ih264_pad_right_luma(
1412 ps_proc->pu1_src_buf_luma + ps_codec->s_cfg.u4_disp_wd,
1413 ps_proc->i4_src_strd, u4_pad_ht, u4_pad_wd);
1414
1415 ih264_pad_right_chroma(
1416 ps_proc->pu1_src_buf_chroma + ps_codec->s_cfg.u4_disp_wd,
1417 ps_proc->i4_src_chroma_strd, u4_pad_ht / 2, u4_pad_wd);
1418 }
1419
1420 if (ps_proc->i4_mb_y && ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) {
1421 UWORD8 *pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] +
1422 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE) -
1423 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0];
1424 UWORD8 *pu1_dst = ps_proc->pu1_src_buf_luma - ps_proc->i4_src_strd;
1425 memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_disp_wd);
1426 if (u4_pad_right_sz && (ps_proc->i4_mb_x == 0)) {
1427 pu1_dst += ps_codec->s_cfg.u4_disp_wd;
1428 memset(pu1_dst, pu1_dst[-1], u4_pad_right_sz);
1429 }
1430 }
1431
1432 /* pad bottom edge */
1433 if (u4_pad_bottom_sz && (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) && ps_proc->i4_mb_x == 0)
1434 {
1435 ih264_pad_bottom(ps_proc->pu1_src_buf_luma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_strd,
1436 ps_proc->i4_src_strd, ps_proc->i4_src_strd, u4_pad_bottom_sz);
1437
1438 ih264_pad_bottom(ps_proc->pu1_src_buf_chroma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_chroma_strd / 2,
1439 ps_proc->i4_src_chroma_strd, ps_proc->i4_src_chroma_strd, (u4_pad_bottom_sz / 2));
1440 }
1441
1442
1443 /* packed mb coeff data */
1444 ps_proc->pv_mb_coeff_data = ((UWORD8 *)ps_proc->pv_pic_mb_coeff_data) + i4_mb_y * ps_codec->u4_size_coeff_data;
1445
1446 /* packed mb header data */
1447 ps_proc->pv_mb_header_data = ((UWORD8 *)ps_proc->pv_pic_mb_header_data) + i4_mb_y * ps_codec->u4_size_header_data;
1448
1449 /* slice index */
1450 ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[i4_mb_y * ps_proc->i4_wd_mbs + i4_mb_x];
1451
1452 /*********************************************************************/
1453 /* ih264e_init_quant_params() routine is called at the pic init level*/
1454 /* this would have initialized the qp. */
1455 /* TODO_LATER: currently it is assumed that quant params donot change*/
1456 /* across mb's. When they do calculate update ps_qp_params accordingly*/
1457 /*********************************************************************/
1458
1459 /* init mv buffer ptr */
1460 ps_proc->ps_pu = ps_cur_mv_buf->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs *
1461 ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
1462
1463 /* Init co-located mv buffer */
1464 ps_proc->ps_colpu = ps_proc->aps_mv_buf[1]->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs *
1465 ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
1466
1467 if (i4_mb_y == 0)
1468 {
1469 ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu;
1470 }
1471 else
1472 {
1473 ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu + ((i4_mb_y - 1) * ps_proc->i4_wd_mbs *
1474 ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
1475 }
1476
1477 ps_proc->pu4_mb_pu_cnt = ps_cur_mv_buf->pu4_mb_pu_cnt + (i4_mb_y * ps_proc->i4_wd_mbs);
1478
1479 /* mb type */
1480 ps_proc->u4_mb_type = I16x16;
1481
1482 /* lambda */
1483 ps_proc->u4_lambda = gu1_qp0[ps_qp_params->u1_mb_qp];
1484
1485 /* mb distortion */
1486 ps_proc->i4_mb_distortion = SHRT_MAX;
1487
1488 if (i4_mb_x == 0)
1489 {
1490 ps_proc->s_left_mb_syntax_ele.i4_mb_distortion = 0;
1491
1492 ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion = 0;
1493
1494 ps_proc->s_top_left_mb_syntax_ME.i4_mb_distortion = 0;
1495
1496 if (i4_mb_y == 0)
1497 {
1498 memset(ps_proc->ps_top_row_mb_syntax_ele, 0, (ps_proc->i4_wd_mbs + 1)*sizeof(mb_info_t));
1499 }
1500 }
1501
1502 /* mb cost */
1503 ps_proc->i4_mb_cost = INT_MAX;
1504
1505 /**********************/
1506 /* init deblk context */
1507 /**********************/
1508 ps_deblk->i4_mb_x = ps_proc->i4_mb_x;
1509 /* deblk lags the current mb proc by 1 row */
1510 /* NOTE: Intra prediction has to happen with non deblocked samples used as reference */
1511 /* Hence to deblk MB 0 of row 0, you have wait till MB 0 of row 1 is encoded. */
1512 /* For simplicity, we chose to lag deblking by 1 Row wrt to proc */
1513 ps_deblk->i4_mb_y = ps_proc->i4_mb_y - 1;
1514
1515 /* buffer ptrs */
1516 ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + i4_rec_strd * (ps_deblk->i4_mb_y * MB_SIZE);
1517 ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + i4_rec_strd * (ps_deblk->i4_mb_y * BLK8x8SIZE);
1518
1519 /* init deblk bs context */
1520 /* mb indices */
1521 ps_bs->i4_mb_x = ps_proc->i4_mb_x;
1522 ps_bs->i4_mb_y = ps_proc->i4_mb_y;
1523
1524 /* init n_mb_process context */
1525 ps_n_mb_ctxt->i4_mb_x = 0;
1526 ps_n_mb_ctxt->i4_mb_y = ps_deblk->i4_mb_y;
1527 ps_n_mb_ctxt->i4_n_mbs = ps_proc->i4_nmb_ntrpy;
1528
1529 return IH264E_SUCCESS;
1530 }
1531
1532 /**
1533 *******************************************************************************
1534 *
1535 * @brief This function performs luma & chroma padding
1536 *
1537 * @par Description:
1538 *
1539 * @param[in] ps_proc
1540 * Process context corresponding to the job
1541 *
1542 * @param[in] pu1_curr_pic_luma
1543 * Pointer to luma buffer
1544 *
1545 * @param[in] pu1_curr_pic_chroma
1546 * Pointer to chroma buffer
1547 *
1548 * @param[in] i4_mb_x
1549 * mb index x
1550 *
1551 * @param[in] i4_mb_y
1552 * mb index y
1553 *
1554 * @param[in] i4_pad_ht
1555 * number of rows to be padded
1556 *
1557 * @returns error status
1558 *
1559 * @remarks none
1560 *
1561 *******************************************************************************
1562 */
ih264e_pad_recon_buffer(process_ctxt_t * ps_proc,UWORD8 * pu1_curr_pic_luma,UWORD8 * pu1_curr_pic_chroma,WORD32 i4_mb_x,WORD32 i4_mb_y,WORD32 i4_pad_ht)1563 IH264E_ERROR_T ih264e_pad_recon_buffer(process_ctxt_t *ps_proc,
1564 UWORD8 *pu1_curr_pic_luma,
1565 UWORD8 *pu1_curr_pic_chroma,
1566 WORD32 i4_mb_x,
1567 WORD32 i4_mb_y,
1568 WORD32 i4_pad_ht)
1569 {
1570 /* codec context */
1571 codec_t *ps_codec = ps_proc->ps_codec;
1572
1573 /* strides */
1574 WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1575
1576 if (i4_mb_x == 0)
1577 {
1578 /* padding left luma */
1579 ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, i4_pad_ht, PAD_LEFT);
1580
1581 /* padding left chroma */
1582 ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, i4_pad_ht >> 1, PAD_LEFT);
1583 }
1584 if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1585 {
1586 /* padding right luma */
1587 ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, i4_pad_ht, PAD_RIGHT);
1588
1589 /* padding right chroma */
1590 ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, i4_pad_ht >> 1, PAD_RIGHT);
1591
1592 if (i4_mb_y == ps_proc->i4_ht_mbs - 1)
1593 {
1594 UWORD8 *pu1_rec_luma = pu1_curr_pic_luma + MB_SIZE + PAD_RIGHT + ((i4_pad_ht - 1) * i4_rec_strd);
1595 UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma + MB_SIZE + PAD_RIGHT + (((i4_pad_ht >> 1) - 1) * i4_rec_strd);
1596
1597 /* padding bottom luma */
1598 ps_codec->pf_pad_bottom(pu1_rec_luma, i4_rec_strd, i4_rec_strd, PAD_BOT);
1599
1600 /* padding bottom chroma */
1601 ps_codec->pf_pad_bottom(pu1_rec_chroma, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1));
1602 }
1603 }
1604
1605 if (i4_mb_y == 0)
1606 {
1607 UWORD8 *pu1_rec_luma = pu1_curr_pic_luma;
1608 UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma;
1609 WORD32 wd = MB_SIZE;
1610
1611 if (i4_mb_x == 0)
1612 {
1613 pu1_rec_luma -= PAD_LEFT;
1614 pu1_rec_chroma -= PAD_LEFT;
1615
1616 wd += PAD_LEFT;
1617 }
1618 if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1619 {
1620 wd += PAD_RIGHT;
1621 }
1622
1623 /* padding top luma */
1624 ps_codec->pf_pad_top(pu1_rec_luma, i4_rec_strd, wd, PAD_TOP);
1625
1626 /* padding top chroma */
1627 ps_codec->pf_pad_top(pu1_rec_chroma, i4_rec_strd, wd, (PAD_TOP >> 1));
1628 }
1629
1630 return IH264E_SUCCESS;
1631 }
1632
1633
1634
1635
1636 /**
1637 *******************************************************************************
1638 *
1639 * @brief This function performs deblocking, padding and halfpel generation for
1640 * 'n' MBs
1641 *
1642 * @par Description:
1643 *
1644 * @param[in] ps_proc
1645 * Process context corresponding to the job
1646 *
1647 * @param[in] pu1_curr_pic_luma
1648 * Current MB being processed(Luma)
1649 *
1650 * @param[in] pu1_curr_pic_chroma
1651 * Current MB being processed(Chroma)
1652 *
1653 * @param[in] i4_mb_x
1654 * Column value of current MB processed
1655 *
1656 * @param[in] i4_mb_y
1657 * Curent row processed
1658 *
1659 * @returns error status
1660 *
1661 * @remarks none
1662 *
1663 *******************************************************************************
1664 */
ih264e_dblk_pad_hpel_processing_n_mbs(process_ctxt_t * ps_proc,UWORD8 * pu1_curr_pic_luma,UWORD8 * pu1_curr_pic_chroma,WORD32 i4_mb_x,WORD32 i4_mb_y)1665 IH264E_ERROR_T ih264e_dblk_pad_hpel_processing_n_mbs(process_ctxt_t *ps_proc,
1666 UWORD8 *pu1_curr_pic_luma,
1667 UWORD8 *pu1_curr_pic_chroma,
1668 WORD32 i4_mb_x,
1669 WORD32 i4_mb_y)
1670 {
1671 /* codec context */
1672 codec_t *ps_codec = ps_proc->ps_codec;
1673
1674 /* n_mb processing context */
1675 n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt;
1676
1677 /* deblk context */
1678 deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
1679
1680 /* strides */
1681 WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1682
1683 /* loop variables */
1684 WORD32 row, i, j, col;
1685
1686 /* Padding Width */
1687 UWORD32 u4_pad_wd;
1688
1689 /* deblk_map of the row being deblocked */
1690 UWORD8 *pu1_deblk_map = ps_proc->pu1_deblk_map + ps_deblk->i4_mb_y * ps_proc->i4_wd_mbs;
1691
1692 /* deblk_map_previous row */
1693 UWORD8 *pu1_deblk_map_prev_row = pu1_deblk_map - ps_proc->i4_wd_mbs;
1694
1695 WORD32 u4_pad_top = 0;
1696
1697 WORD32 u4_deblk_prev_row = 0;
1698
1699 /* Number of mbs to be processed */
1700 WORD32 i4_n_mbs = ps_n_mb_ctxt->i4_n_mbs;
1701
1702 /* Number of mbs actually processed
1703 * (at the end of a row, when remaining number of MBs are less than i4_n_mbs) */
1704 WORD32 i4_n_mb_process_count = 0;
1705
1706 UWORD8 *pu1_pad_bottom_src = NULL;
1707
1708 UWORD8 *pu1_pad_src_luma = NULL;
1709 UWORD8 *pu1_pad_src_chroma = NULL;
1710
1711 if (ps_proc->u4_disable_deblock_level == 1)
1712 {
1713 /* If left most MB is processed, then pad left */
1714 if (i4_mb_x == 0)
1715 {
1716 /* padding left luma */
1717 ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1718
1719 /* padding left chroma */
1720 ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT);
1721 }
1722 /*last col*/
1723 if (i4_mb_x == (ps_proc->i4_wd_mbs - 1))
1724 {
1725 /* padding right luma */
1726 ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1727
1728 /* padding right chroma */
1729 ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT);
1730 }
1731 }
1732
1733 if ((i4_mb_y > 0) || (i4_mb_y == (ps_proc->i4_ht_mbs - 1)))
1734 {
1735 /* if number of mb's to be processed are less than 'N', go back.
1736 * exception to the above clause is end of row */
1737 if ( ((i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1)) < i4_n_mbs) && (i4_mb_x < (ps_proc->i4_wd_mbs - 1)) )
1738 {
1739 return IH264E_SUCCESS;
1740 }
1741 else
1742 {
1743 i4_n_mb_process_count = MIN(i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1), i4_n_mbs);
1744
1745 /* performing deblocking for required number of MBs */
1746 if ((i4_mb_y > 0) && (ps_proc->u4_disable_deblock_level != 1))
1747 {
1748 u4_deblk_prev_row = 1;
1749
1750 /* checking whether the top rows are deblocked */
1751 for (col = 0; col < i4_n_mb_process_count; col++)
1752 {
1753 u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + col];
1754 }
1755
1756 /* checking whether the top right MB is deblocked */
1757 if ((ps_deblk->i4_mb_x + i4_n_mb_process_count) != ps_proc->i4_wd_mbs)
1758 {
1759 u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + i4_n_mb_process_count];
1760 }
1761
1762 /* Top or Top right MBs not deblocked */
1763 if ((u4_deblk_prev_row != 1) && (i4_mb_y > 0))
1764 {
1765 return IH264E_SUCCESS;
1766 }
1767
1768 for (row = 0; row < i4_n_mb_process_count; row++)
1769 {
1770 ih264e_deblock_mb(ps_proc, ps_deblk);
1771
1772 pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1773
1774 if (ps_deblk->i4_mb_y > 0)
1775 {
1776 if (ps_deblk->i4_mb_x == 0)/* If left most MB is processed, then pad left*/
1777 {
1778 /* padding left luma */
1779 ps_codec->pf_pad_left_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE, i4_rec_strd, MB_SIZE, PAD_LEFT);
1780
1781 /* padding left chroma */
1782 ps_codec->pf_pad_left_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT);
1783 }
1784
1785 if (ps_deblk->i4_mb_x == (ps_proc->i4_wd_mbs - 1))/*last column*/
1786 {
1787 /* padding right luma */
1788 ps_codec->pf_pad_right_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1789
1790 /* padding right chroma */
1791 ps_codec->pf_pad_right_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT);
1792 }
1793 }
1794 ps_deblk->i4_mb_x++;
1795
1796 ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1797 ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1798
1799 }
1800 }
1801 else if(i4_mb_y > 0)
1802 {
1803 ps_deblk->i4_mb_x += i4_n_mb_process_count;
1804
1805 ps_deblk->pu1_cur_pic_luma += i4_n_mb_process_count * MB_SIZE;
1806 ps_deblk->pu1_cur_pic_chroma += i4_n_mb_process_count * MB_SIZE;
1807 }
1808
1809 if (i4_mb_y == 2)
1810 {
1811 u4_pad_wd = i4_n_mb_process_count * MB_SIZE;
1812 u4_pad_top = ps_n_mb_ctxt->i4_mb_x * MB_SIZE;
1813
1814 if (ps_n_mb_ctxt->i4_mb_x == 0)
1815 {
1816 u4_pad_wd += PAD_LEFT;
1817 u4_pad_top = -PAD_LEFT;
1818 }
1819
1820 if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1821 {
1822 u4_pad_wd += PAD_RIGHT;
1823 }
1824
1825 /* padding top luma */
1826 ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_luma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, PAD_TOP);
1827
1828 /* padding top chroma */
1829 ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_chroma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, (PAD_TOP >> 1));
1830 }
1831
1832 ps_n_mb_ctxt->i4_mb_x += i4_n_mb_process_count;
1833
1834 if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1835 {
1836 if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
1837 {
1838 /* Bottom Padding is done in one stretch for the entire width */
1839 if (ps_proc->u4_disable_deblock_level != 1)
1840 {
1841 ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * MB_SIZE;
1842
1843 ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * BLK8x8SIZE;
1844
1845 ps_n_mb_ctxt->i4_mb_x = 0;
1846 ps_n_mb_ctxt->i4_mb_y = ps_proc->i4_mb_y;
1847 ps_deblk->i4_mb_x = 0;
1848 ps_deblk->i4_mb_y = ps_proc->i4_mb_y;
1849
1850 /* update pic qp map (as update_proc_ctxt is still not called for the last MB) */
1851 ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp;
1852
1853 i4_n_mb_process_count = (ps_proc->i4_wd_mbs) % i4_n_mbs;
1854
1855 j = (ps_proc->i4_wd_mbs) / i4_n_mbs;
1856
1857 for (i = 0; i < j; i++)
1858 {
1859 for (col = 0; col < i4_n_mbs; col++)
1860 {
1861 ih264e_deblock_mb(ps_proc, ps_deblk);
1862
1863 pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1864
1865 ps_deblk->i4_mb_x++;
1866 ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1867 ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1868 ps_n_mb_ctxt->i4_mb_x++;
1869 }
1870 }
1871
1872 for (col = 0; col < i4_n_mb_process_count; col++)
1873 {
1874 ih264e_deblock_mb(ps_proc, ps_deblk);
1875
1876 pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1877
1878 ps_deblk->i4_mb_x++;
1879 ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1880 ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1881 ps_n_mb_ctxt->i4_mb_x++;
1882 }
1883
1884 pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd;
1885
1886 pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd;
1887
1888 /* padding left luma */
1889 ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1890
1891 /* padding left chroma */
1892 ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT);
1893
1894 pu1_pad_src_luma += i4_rec_strd * MB_SIZE;
1895 pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE;
1896
1897 /* padding left luma */
1898 ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1899
1900 /* padding left chroma */
1901 ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT);
1902
1903 pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE;
1904
1905 pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE;
1906
1907 /* padding right luma */
1908 ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1909
1910 /* padding right chroma */
1911 ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT);
1912
1913 pu1_pad_src_luma += i4_rec_strd * MB_SIZE;
1914 pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE;
1915
1916 /* padding right luma */
1917 ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1918
1919 /* padding right chroma */
1920 ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT);
1921
1922 }
1923
1924 /* In case height is less than 2 MBs pad top */
1925 if (ps_proc->i4_ht_mbs <= 2)
1926 {
1927 UWORD8 *pu1_pad_top_src;
1928 /* padding top luma */
1929 pu1_pad_top_src = ps_proc->pu1_rec_buf_luma_base - PAD_LEFT;
1930 ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, PAD_TOP);
1931
1932 /* padding top chroma */
1933 pu1_pad_top_src = ps_proc->pu1_rec_buf_chroma_base - PAD_LEFT;
1934 ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, (PAD_TOP >> 1));
1935 }
1936
1937 /* padding bottom luma */
1938 pu1_pad_bottom_src = ps_proc->pu1_rec_buf_luma_base + ps_proc->i4_ht_mbs * MB_SIZE * i4_rec_strd - PAD_LEFT;
1939 ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, PAD_BOT);
1940
1941 /* padding bottom chroma */
1942 pu1_pad_bottom_src = ps_proc->pu1_rec_buf_chroma_base + ps_proc->i4_ht_mbs * (MB_SIZE >> 1) * i4_rec_strd - PAD_LEFT;
1943 ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1));
1944 }
1945 }
1946 }
1947 }
1948
1949 return IH264E_SUCCESS;
1950 }
1951
1952
1953 /**
1954 *******************************************************************************
1955 *
1956 * @brief This function performs luma & chroma core coding for a set of mb's.
1957 *
1958 * @par Description:
1959 * The mb to be coded is taken and is evaluated over a predefined set of modes
1960 * (intra (i16, i4, i8)/inter (mv, skip)) for best cost. The mode with least cost
1961 * is selected and using intra/inter prediction filters, prediction is carried out.
1962 * The deviation between src and pred signal constitutes error signal. This error
1963 * signal is transformed (hierarchical transform if necessary) and quantized. The
1964 * quantized residue is packed in to entropy buffer for entropy coding. This is
1965 * repeated for all the mb's enlisted under the job.
1966 *
1967 * @param[in] ps_proc
1968 * Process context corresponding to the job
1969 *
1970 * @returns error status
1971 *
1972 * @remarks none
1973 *
1974 *******************************************************************************
1975 */
ih264e_process(process_ctxt_t * ps_proc)1976 WORD32 ih264e_process(process_ctxt_t *ps_proc)
1977 {
1978 /* error status */
1979 WORD32 error_status = IH264_SUCCESS;
1980
1981 /* codec context */
1982 codec_t *ps_codec = ps_proc->ps_codec;
1983
1984 /* cbp luma, chroma */
1985 UWORD32 u4_cbp_l, u4_cbp_c;
1986
1987 /* width in mbs */
1988 WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs;
1989
1990 /* loop var */
1991 WORD32 i4_mb_idx, i4_mb_cnt = ps_proc->i4_mb_cnt;
1992
1993 /* valid modes */
1994 UWORD32 u4_valid_modes = 0;
1995
1996 /* gate threshold */
1997 WORD32 i4_gate_threshold = 0;
1998
1999 /* is intra */
2000 WORD32 luma_idx, chroma_idx, is_intra;
2001
2002 /* temp variables */
2003 WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt % MAX_CTXT_SETS;
2004
2005 /*
2006 * list of modes for evaluation
2007 * -------------------------------------------------------------------------
2008 * Note on enabling I4x4 and I16x16
2009 * At very low QP's the hadamard transform in I16x16 will push up the maximum
2010 * coeff value very high. CAVLC may not be able to represent the value and
2011 * hence the stream may not be decodable in some clips.
2012 * Hence at low QPs, we will enable I4x4 and disable I16x16 irrespective of preset.
2013 */
2014 if (ps_proc->i4_slice_type == ISLICE)
2015 {
2016 if (ps_proc->u4_frame_qp > 10)
2017 {
2018 /* enable intra 16x16 */
2019 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
2020
2021 /* enable intra 8x8 */
2022 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_8x8 ? (1 << I8x8) : 0;
2023 }
2024
2025 /* enable intra 4x4 */
2026 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
2027 u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
2028
2029 }
2030 else if (ps_proc->i4_slice_type == PSLICE)
2031 {
2032 if (ps_proc->u4_frame_qp > 10)
2033 {
2034 /* enable intra 16x16 */
2035 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
2036 }
2037
2038 /* enable intra 4x4 */
2039 if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
2040 {
2041 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
2042 }
2043 u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
2044
2045 /* enable inter P16x16 */
2046 u4_valid_modes |= (1 << P16x16);
2047 }
2048 else if (ps_proc->i4_slice_type == BSLICE)
2049 {
2050 if (ps_proc->u4_frame_qp > 10)
2051 {
2052 /* enable intra 16x16 */
2053 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
2054 }
2055
2056 /* enable intra 4x4 */
2057 if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
2058 {
2059 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
2060 }
2061 u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
2062
2063 /* enable inter B16x16 */
2064 u4_valid_modes |= (1 << B16x16);
2065 }
2066
2067
2068 /* init entropy */
2069 ps_proc->s_entropy.i4_mb_x = ps_proc->i4_mb_x;
2070 ps_proc->s_entropy.i4_mb_y = ps_proc->i4_mb_y;
2071 ps_proc->s_entropy.i4_mb_cnt = MIN(ps_proc->i4_nmb_ntrpy, i4_wd_mbs - ps_proc->i4_mb_x);
2072
2073 /* compute recon when :
2074 * 1. current frame is to be used as a reference
2075 * 2. dump recon for bit stream sanity check
2076 */
2077 ps_proc->u4_compute_recon = ps_codec->u4_is_curr_frm_ref ||
2078 ps_codec->s_cfg.u4_enable_recon ||
2079 ps_codec->s_cfg.u4_enable_quality_metrics & QUALITY_MASK_PSNR;
2080
2081 /* Encode 'n' macroblocks,
2082 * 'n' being the number of mbs dictated by current proc ctxt */
2083 for (i4_mb_idx = 0; i4_mb_idx < i4_mb_cnt; i4_mb_idx ++)
2084 {
2085 /* since we have not yet found sad, we have not yet got min sad */
2086 /* we need to initialize these variables for each MB */
2087 /* TODO how to get the min sad into the codec */
2088 ps_proc->u4_min_sad = ps_codec->s_cfg.i4_min_sad;
2089 ps_proc->u4_min_sad_reached = 0;
2090
2091 /* mb analysis */
2092 {
2093 /* temp var */
2094 WORD32 i4_mb_id = ps_proc->i4_mb_x + ps_proc->i4_mb_y * i4_wd_mbs;
2095
2096 /* force intra refresh ? */
2097 WORD32 i4_air_enable_inter = (ps_codec->s_cfg.e_air_mode == IVE_AIR_MODE_NONE) ||
2098 (ps_codec->pu2_intr_rfrsh_map[i4_mb_id] != ps_codec->i4_air_pic_cnt);
2099
2100 /* evaluate inter 16x16 modes */
2101 if ((u4_valid_modes & (1 << P16x16)) || (u4_valid_modes & (1 << B16x16)))
2102 {
2103 /* compute nmb me */
2104 if (ps_proc->i4_mb_x % ps_proc->u4_nmb_me == 0)
2105 {
2106 ih264e_compute_me_nmb(ps_proc, MIN((WORD32)ps_proc->u4_nmb_me,
2107 i4_wd_mbs - ps_proc->i4_mb_x));
2108 }
2109
2110 /* set pointers to ME data appropriately for other modules to use */
2111 {
2112 UWORD32 u4_mb_index = ps_proc->i4_mb_x % ps_proc->u4_nmb_me ;
2113
2114 /* get the min sad condition for current mb */
2115 ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached;
2116 ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad;
2117
2118 ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_skip_mv[0]);
2119 ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_mb_index].s_ngbr_avbl);
2120 ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_pred_mv[0]);
2121
2122 ps_proc->i4_mb_distortion = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_distortion;
2123 ps_proc->i4_mb_cost = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_cost;
2124 ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad;
2125 ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached;
2126 ps_proc->u4_mb_type = ps_proc->ps_nmb_info[u4_mb_index].u4_mb_type;
2127
2128 /* get the best sub pel buffer */
2129 ps_proc->pu1_best_subpel_buf = ps_proc->ps_nmb_info[u4_mb_index].pu1_best_sub_pel_buf;
2130 ps_proc->u4_bst_spel_buf_strd = ps_proc->ps_nmb_info[u4_mb_index].u4_bst_spel_buf_strd;
2131 }
2132 ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
2133 }
2134 else
2135 {
2136 /* Derive neighbor availability for the current macroblock */
2137 ps_proc->ps_ngbr_avbl = &ps_proc->s_ngbr_avbl;
2138
2139 ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
2140 }
2141
2142 /*
2143 * If air says intra, we need to force the following code path to evaluate intra
2144 * The easy way is just to say that the inter cost is too much
2145 */
2146 if (!i4_air_enable_inter)
2147 {
2148 ps_proc->u4_min_sad_reached = 0;
2149 ps_proc->i4_mb_cost = INT_MAX;
2150 ps_proc->i4_mb_distortion = INT_MAX;
2151 }
2152 else if (ps_proc->u4_mb_type == PSKIP)
2153 {
2154 goto UPDATE_MB_INFO;
2155 }
2156
2157 /* wait until the proc of [top + 1] mb is computed.
2158 * We wait till the proc dependencies are satisfied */
2159 if(ps_proc->i4_mb_y > 0)
2160 {
2161 /* proc map */
2162 UWORD8 *pu1_proc_map_top;
2163
2164 pu1_proc_map_top = ps_proc->pu1_proc_map + ((ps_proc->i4_mb_y - 1) * i4_wd_mbs);
2165
2166 while (1)
2167 {
2168 volatile UWORD8 *pu1_buf;
2169 WORD32 idx = i4_mb_idx + 1;
2170
2171 idx = MIN(idx, ((WORD32)ps_codec->s_cfg.i4_wd_mbs - 1));
2172 pu1_buf = pu1_proc_map_top + idx;
2173 if(*pu1_buf)
2174 break;
2175 ithread_yield();
2176 }
2177 }
2178
2179 /* If we already have the minimum sad, there is no point in searching for sad again */
2180 if (ps_proc->u4_min_sad_reached == 0 || ps_codec->s_cfg.u4_enc_speed_preset != IVE_FASTEST)
2181 {
2182 /* intra gating in inter slices */
2183 /* No need of gating if we want to force intra, we need to find the threshold only if inter is enabled by AIR*/
2184 if (i4_air_enable_inter && ps_proc->i4_slice_type != ISLICE && ps_codec->u4_inter_gate)
2185 {
2186 /* distortion of neighboring blocks */
2187 WORD32 i4_distortion[4];
2188
2189 i4_distortion[0] = ps_proc->s_left_mb_syntax_ele.i4_mb_distortion;
2190
2191 i4_distortion[1] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x].i4_mb_distortion;
2192
2193 i4_distortion[2] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x + 1].i4_mb_distortion;
2194
2195 i4_distortion[3] = ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion;
2196
2197 i4_gate_threshold = (i4_distortion[0] + i4_distortion[1] + i4_distortion[2] + i4_distortion[3]) >> 2;
2198
2199 }
2200
2201
2202 /* If we are going to force intra we need to evaluate intra irrespective of gating */
2203 if ( (!i4_air_enable_inter) || ((i4_gate_threshold + 16 *((WORD32) ps_proc->u4_lambda)) < ps_proc->i4_mb_distortion))
2204 {
2205 /* evaluate intra 4x4 modes */
2206 if (u4_valid_modes & (1 << I4x4))
2207 {
2208 if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
2209 {
2210 ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton(ps_proc);
2211 }
2212 else
2213 {
2214 ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff(ps_proc);
2215 }
2216 }
2217
2218 /* evaluate intra 16x16 modes */
2219 if (u4_valid_modes & (1 << I16x16))
2220 {
2221 ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(ps_proc);
2222 }
2223
2224 /* evaluate intra 8x8 modes */
2225 if (u4_valid_modes & (1 << I8x8))
2226 {
2227 ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(ps_proc);
2228 }
2229
2230 }
2231 }
2232 }
2233
2234 /* is intra */
2235 if (ps_proc->u4_mb_type == I4x4 || ps_proc->u4_mb_type == I16x16 || ps_proc->u4_mb_type == I8x8)
2236 {
2237 luma_idx = ps_proc->u4_mb_type;
2238 chroma_idx = 0;
2239 is_intra = 1;
2240
2241 /* evaluate chroma blocks for intra */
2242 ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(ps_proc);
2243 }
2244 else
2245 {
2246 luma_idx = 3;
2247 chroma_idx = 1;
2248 is_intra = 0;
2249 }
2250 ps_proc->u4_is_intra = is_intra;
2251 ps_proc->ps_pu->b1_intra_flag = is_intra;
2252
2253 /* redo MV pred of neighbors in the case intra mb */
2254 /* TODO : currently called unconditionally, needs to be called only in the case of intra
2255 * to modify neighbors */
2256 if (ps_proc->i4_slice_type != ISLICE)
2257 {
2258 ih264e_mv_pred(ps_proc, ps_proc->i4_slice_type);
2259 }
2260
2261 /* Perform luma mb core coding */
2262 u4_cbp_l = (ps_codec->luma_energy_compaction)[luma_idx](ps_proc);
2263
2264 /* Perform luma mb core coding */
2265 u4_cbp_c = (ps_codec->chroma_energy_compaction)[chroma_idx](ps_proc);
2266
2267 /* coded block pattern */
2268 ps_proc->u4_cbp = (u4_cbp_c << 4) | u4_cbp_l;
2269
2270 if (!ps_proc->u4_is_intra)
2271 {
2272 if (ps_proc->i4_slice_type == BSLICE)
2273 {
2274 if (ih264e_find_bskip_params(ps_proc, PRED_L0))
2275 {
2276 ps_proc->u4_mb_type = (ps_proc->u4_cbp) ? BDIRECT : BSKIP;
2277 }
2278 }
2279 else if(!ps_proc->u4_cbp)
2280 {
2281 if (ih264e_find_pskip_params(ps_proc, PRED_L0))
2282 {
2283 ps_proc->u4_mb_type = PSKIP;
2284 }
2285 }
2286 }
2287
2288 UPDATE_MB_INFO:
2289
2290 /* Update mb sad, mb qp and intra mb cost. Will be used by rate control */
2291 ih264e_update_rc_mb_info(&ps_proc->s_frame_info, ps_proc);
2292
2293 /**********************************************************************/
2294 /* if disable deblock level is '0' this implies enable deblocking for */
2295 /* all edges of all macroblocks with out any restrictions */
2296 /* */
2297 /* if disable deblock level is '1' this implies disable deblocking for*/
2298 /* all edges of all macroblocks with out any restrictions */
2299 /* */
2300 /* if disable deblock level is '2' this implies enable deblocking for */
2301 /* all edges of all macroblocks except edges overlapping with slice */
2302 /* boundaries. This option is not currently supported by the encoder */
2303 /* hence the slice map should be of no significance to perform debloc */
2304 /* king */
2305 /**********************************************************************/
2306
2307 if (ps_proc->u4_compute_recon)
2308 {
2309 /* deblk context */
2310 /* src pointers */
2311 UWORD8 *pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma;
2312 UWORD8 *pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma;
2313
2314 /* src indices */
2315 UWORD32 i4_mb_x = ps_proc->i4_mb_x;
2316 UWORD32 i4_mb_y = ps_proc->i4_mb_y;
2317
2318 /* compute blocking strength */
2319 if (ps_proc->u4_disable_deblock_level != 1)
2320 {
2321 ih264e_compute_bs(ps_proc);
2322 }
2323
2324 /* nmb deblocking and hpel and padding */
2325 ih264e_dblk_pad_hpel_processing_n_mbs(ps_proc, pu1_cur_pic_luma,
2326 pu1_cur_pic_chroma, i4_mb_x,
2327 i4_mb_y);
2328 }
2329
2330 /* update the context after for coding next mb */
2331 error_status = ih264e_update_proc_ctxt(ps_proc);
2332 if(error_status != IH264E_SUCCESS)
2333 {
2334 return error_status;
2335 }
2336 /* Once the last row is processed, mark the buffer status appropriately */
2337 if (ps_proc->i4_ht_mbs == ps_proc->i4_mb_y)
2338 {
2339 /* Pointer to current picture buffer structure */
2340 pic_buf_t *ps_cur_pic = ps_proc->ps_cur_pic;
2341
2342 /* Pointer to current picture's mv buffer structure */
2343 mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf;
2344
2345 /**********************************************************************/
2346 /* if disable deblock level is '0' this implies enable deblocking for */
2347 /* all edges of all macroblocks with out any restrictions */
2348 /* */
2349 /* if disable deblock level is '1' this implies disable deblocking for*/
2350 /* all edges of all macroblocks with out any restrictions */
2351 /* */
2352 /* if disable deblock level is '2' this implies enable deblocking for */
2353 /* all edges of all macroblocks except edges overlapping with slice */
2354 /* boundaries. This option is not currently supported by the encoder */
2355 /* hence the slice map should be of no significance to perform debloc */
2356 /* king */
2357 /**********************************************************************/
2358 error_status = ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr,
2359 ps_cur_mv_buf->i4_buf_id , BUF_MGR_CODEC);
2360 if(error_status != IH264E_SUCCESS)
2361 {
2362 return error_status;
2363 }
2364 error_status = ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr,
2365 ps_cur_pic->i4_buf_id , BUF_MGR_CODEC);
2366 if(error_status != IH264E_SUCCESS)
2367 {
2368 return error_status;
2369 }
2370 if (ps_codec->s_cfg.u4_enable_recon)
2371 {
2372 /* pic cnt */
2373 ps_codec->as_rec_buf[ctxt_sel].i4_pic_cnt = ps_proc->i4_pic_cnt;
2374
2375 /* rec buffers */
2376 ps_codec->as_rec_buf[ctxt_sel].s_pic_buf = *ps_proc->ps_cur_pic;
2377
2378 /* is last? */
2379 ps_codec->as_rec_buf[ctxt_sel].u4_is_last = ps_proc->s_entropy.u4_is_last;
2380
2381 /* frame time stamp */
2382 ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_high = ps_proc->s_entropy.u4_timestamp_high;
2383 ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_low = ps_proc->s_entropy.u4_timestamp_low;
2384 }
2385
2386 }
2387 }
2388
2389 DEBUG_HISTOGRAM_DUMP(ps_codec->s_cfg.i4_ht_mbs == ps_proc->i4_mb_y);
2390
2391 return error_status;
2392 }
2393
2394 /**
2395 *******************************************************************************
2396 *
2397 * @brief
2398 * Function to update rc context after encoding
2399 *
2400 * @par Description
2401 * This function updates the rate control context after the frame is encoded.
2402 * Number of bits consumed by the current frame, frame distortion, frame cost,
2403 * number of intra/inter mb's, ... are passed on to rate control context for
2404 * updating the rc model.
2405 *
2406 * @param[in] ps_codec
2407 * Handle to codec context
2408 *
2409 * @param[in] ctxt_sel
2410 * frame context selector
2411 *
2412 * @param[in] pic_cnt
2413 * pic count
2414 *
2415 * @returns i4_stuffing_byte
2416 * number of stuffing bytes (if necessary)
2417 *
2418 * @remarks
2419 *
2420 *******************************************************************************
2421 */
ih264e_update_rc_post_enc(codec_t * ps_codec,WORD32 ctxt_sel,WORD32 i4_is_first_frm)2422 WORD32 ih264e_update_rc_post_enc(codec_t *ps_codec, WORD32 ctxt_sel, WORD32 i4_is_first_frm)
2423 {
2424 /* proc set base idx */
2425 WORD32 i4_proc_ctxt_sel_base = ctxt_sel ? (MAX_PROCESS_CTXT / 2) : 0;
2426
2427 /* proc ctxt */
2428 process_ctxt_t *ps_proc = &ps_codec->as_process[i4_proc_ctxt_sel_base];
2429
2430 /* frame qp */
2431 UWORD8 u1_frame_qp = ps_codec->u4_frame_qp;
2432
2433 /* cbr rc return status */
2434 WORD32 i4_stuffing_byte = 0;
2435
2436 /* current frame stats */
2437 frame_info_t s_frame_info;
2438 picture_type_e rc_pic_type;
2439
2440 /* temp var */
2441 WORD32 i, j;
2442
2443 /********************************************************************/
2444 /* BEGIN INIT */
2445 /********************************************************************/
2446
2447 /* init frame info */
2448 irc_init_frame_info(&s_frame_info);
2449
2450 /* get frame info */
2451 for (i = 0; i < (WORD32)ps_codec->s_cfg.u4_num_cores; i++)
2452 {
2453 /*****************************************************************/
2454 /* One frame can be encoded by max of u4_num_cores threads */
2455 /* Accumulating the num mbs, sad, qp and intra_mb_cost from */
2456 /* u4_num_cores threads */
2457 /*****************************************************************/
2458 for (j = 0; j< MAX_MB_TYPE; j++)
2459 {
2460 s_frame_info.num_mbs[j] += ps_proc[i].s_frame_info.num_mbs[j];
2461
2462 s_frame_info.tot_mb_sad[j] += ps_proc[i].s_frame_info.tot_mb_sad[j];
2463
2464 s_frame_info.qp_sum[j] += ps_proc[i].s_frame_info.qp_sum[j];
2465 }
2466
2467 s_frame_info.intra_mb_cost_sum += ps_proc[i].s_frame_info.intra_mb_cost_sum;
2468
2469 s_frame_info.activity_sum += ps_proc[i].s_frame_info.activity_sum;
2470
2471 /*****************************************************************/
2472 /* gather number of residue and header bits consumed by the frame*/
2473 /*****************************************************************/
2474 ih264e_update_rc_bits_info(&s_frame_info, &ps_proc[i].s_entropy);
2475 }
2476
2477 /* get pic type */
2478 switch (ps_codec->pic_type)
2479 {
2480 case PIC_I:
2481 case PIC_IDR:
2482 rc_pic_type = I_PIC;
2483 break;
2484 case PIC_P:
2485 rc_pic_type = P_PIC;
2486 break;
2487 case PIC_B:
2488 rc_pic_type = B_PIC;
2489 break;
2490 default:
2491 assert(0);
2492 break;
2493 }
2494
2495 /* update rc lib with current frame stats */
2496 i4_stuffing_byte = ih264e_rc_post_enc(ps_codec->s_rate_control.pps_rate_control_api,
2497 &(s_frame_info),
2498 ps_codec->s_rate_control.pps_pd_frm_rate,
2499 ps_codec->s_rate_control.pps_time_stamp,
2500 ps_codec->s_rate_control.pps_frame_time,
2501 (ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs),
2502 &rc_pic_type,
2503 i4_is_first_frm,
2504 &ps_codec->s_rate_control.post_encode_skip[ctxt_sel],
2505 u1_frame_qp,
2506 &ps_codec->s_rate_control.num_intra_in_prev_frame,
2507 &ps_codec->s_rate_control.i4_avg_activity);
2508 return i4_stuffing_byte;
2509 }
2510
2511 /**
2512 *******************************************************************************
2513 *
2514 * @brief
2515 * entry point of a spawned encoder thread
2516 *
2517 * @par Description:
2518 * The encoder thread dequeues a proc/entropy job from the encoder queue and
2519 * calls necessary routines.
2520 *
2521 * @param[in] pv_proc
2522 * Process context corresponding to the thread
2523 *
2524 * @returns error status
2525 *
2526 * @remarks
2527 *
2528 *******************************************************************************
2529 */
ih264e_process_thread(void * pv_proc)2530 WORD32 ih264e_process_thread(void *pv_proc)
2531 {
2532 /* error status */
2533 IH264_ERROR_T ret = IH264_SUCCESS;
2534 WORD32 error_status = IH264_SUCCESS;
2535
2536 /* proc ctxt */
2537 process_ctxt_t *ps_proc = pv_proc;
2538
2539 /* codec ctxt */
2540 codec_t *ps_codec = ps_proc->ps_codec;
2541
2542 /* structure to represent a processing job entry */
2543 job_t s_job;
2544
2545 /* blocking call : entropy dequeue is non-blocking till all
2546 * the proc jobs are processed */
2547 WORD32 is_blocking = 0;
2548
2549 /* set affinity */
2550 ithread_set_affinity(ps_proc->i4_id);
2551
2552 ps_proc->i4_error_code = IH264_SUCCESS;
2553 while(1)
2554 {
2555 /* dequeue a job from the entropy queue */
2556 {
2557 int error = ithread_mutex_lock(ps_codec->pv_entropy_mutex);
2558
2559 /* codec context selector */
2560 WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
2561
2562 volatile UWORD32 *pu4_buf = &ps_codec->au4_entropy_thread_active[ctxt_sel];
2563
2564 /* have the lock */
2565 if (error == 0)
2566 {
2567 if (*pu4_buf == 0)
2568 {
2569 /* no entropy threads are active, try dequeuing a job from the entropy queue */
2570 ret = ih264_list_dequeue(ps_proc->pv_entropy_jobq, &s_job, is_blocking);
2571 if (IH264_SUCCESS == ret)
2572 {
2573 *pu4_buf = 1;
2574 ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2575 goto WORKER;
2576 }
2577 else if(is_blocking)
2578 {
2579 ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2580 break;
2581 }
2582 }
2583 ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2584 }
2585 }
2586
2587 /* dequeue a job from the process queue */
2588 ret = ih264_list_dequeue(ps_proc->pv_proc_jobq, &s_job, 1);
2589 if (IH264_SUCCESS != ret)
2590 {
2591 if(ps_proc->i4_id)
2592 break;
2593 else
2594 {
2595 is_blocking = 1;
2596 continue;
2597 }
2598 }
2599
2600 WORKER:
2601 /* choose appropriate proc context based on proc_base_idx */
2602 ps_proc = &ps_codec->as_process[ps_proc->i4_id + s_job.i2_proc_base_idx];
2603
2604 switch (s_job.i4_cmd)
2605 {
2606 case CMD_PROCESS:
2607 ps_proc->i4_mb_cnt = s_job.i2_mb_cnt;
2608 ps_proc->i4_mb_x = s_job.i2_mb_x;
2609 ps_proc->i4_mb_y = s_job.i2_mb_y;
2610
2611 /* init process context */
2612 ih264e_init_proc_ctxt(ps_proc);
2613
2614 /* core code all mbs enlisted under the current job */
2615 error_status = ih264e_process(ps_proc);
2616 if(error_status !=IH264_SUCCESS)
2617 {
2618 ps_proc->i4_error_code = error_status;
2619 return ret;
2620 }
2621 break;
2622
2623 case CMD_ENTROPY:
2624 ps_proc->s_entropy.i4_mb_x = s_job.i2_mb_x;
2625 ps_proc->s_entropy.i4_mb_y = s_job.i2_mb_y;
2626 ps_proc->s_entropy.i4_mb_cnt = s_job.i2_mb_cnt;
2627
2628 /* init entropy */
2629 ih264e_init_entropy_ctxt(ps_proc);
2630
2631 /* entropy code all mbs enlisted under the current job */
2632 error_status = ih264e_entropy(ps_proc);
2633 if(error_status !=IH264_SUCCESS)
2634 {
2635 ps_proc->i4_error_code = error_status;
2636 return ret;
2637 }
2638 break;
2639
2640 default:
2641 ps_proc->i4_error_code = IH264_FAIL;
2642 return ret;
2643 }
2644 }
2645
2646 return ret;
2647 }
2648