• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /******************************************************************************
2  *
3  * Copyright (C) 2015 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 
21 /**
22 *******************************************************************************
23 * @file
24 *  ih264e_process.c
25 *
26 * @brief
27 *  Contains functions for codec thread
28 *
29 * @author
30 *  Harish
31 *
32 * @par List of Functions:
33 * - ih264e_generate_sps_pps()
34 * - ih264e_init_entropy_ctxt()
35 * - ih264e_entropy()
36 * - ih264e_pack_header_data()
37 * - ih264e_update_proc_ctxt()
38 * - ih264e_init_proc_ctxt()
39 * - ih264e_pad_recon_buffer()
40 * - ih264e_dblk_pad_hpel_processing_n_mbs()
41 * - ih264e_process()
42 * - ih264e_set_rc_pic_params()
43 * - ih264e_update_rc_post_enc()
44 * - ih264e_process_thread()
45 *
46 * @remarks
47 *  None
48 *
49 *******************************************************************************
50 */
51 
52 /*****************************************************************************/
53 /* File Includes                                                             */
54 /*****************************************************************************/
55 
56 /* System include files */
57 #include <stdio.h>
58 #include <stddef.h>
59 #include <stdlib.h>
60 #include <string.h>
61 #include <limits.h>
62 #include <assert.h>
63 
64 /* User include files */
65 #include "ih264_typedefs.h"
66 #include "iv2.h"
67 #include "ive2.h"
68 #include "ih264_defs.h"
69 #include "ih264_debug.h"
70 #include "ime_distortion_metrics.h"
71 #include "ime_defs.h"
72 #include "ime_structs.h"
73 #include "ih264_error.h"
74 #include "ih264_structs.h"
75 #include "ih264_trans_quant_itrans_iquant.h"
76 #include "ih264_inter_pred_filters.h"
77 #include "ih264_mem_fns.h"
78 #include "ih264_padding.h"
79 #include "ih264_intra_pred_filters.h"
80 #include "ih264_deblk_edge_filters.h"
81 #include "ih264_cabac_tables.h"
82 #include "ih264_platform_macros.h"
83 #include "ih264_macros.h"
84 #include "ih264_buf_mgr.h"
85 #include "ih264e_error.h"
86 #include "ih264e_bitstream.h"
87 #include "ih264_common_tables.h"
88 #include "ih264_list.h"
89 #include "ih264e_defs.h"
90 #include "irc_cntrl_param.h"
91 #include "irc_frame_info_collector.h"
92 #include "ih264e_rate_control.h"
93 #include "ih264e_cabac_structs.h"
94 #include "ih264e_structs.h"
95 #include "ih264e_cabac.h"
96 #include "ih264e_process.h"
97 #include "ithread.h"
98 #include "ih264e_intra_modes_eval.h"
99 #include "ih264e_encode_header.h"
100 #include "ih264e_globals.h"
101 #include "ih264e_config.h"
102 #include "ih264e_trace.h"
103 #include "ih264e_statistics.h"
104 #include "ih264_cavlc_tables.h"
105 #include "ih264e_cavlc.h"
106 #include "ih264e_deblk.h"
107 #include "ih264e_me.h"
108 #include "ih264e_debug.h"
109 #include "ih264e_master.h"
110 #include "ih264e_utils.h"
111 #include "irc_mem_req_and_acq.h"
112 #include "irc_rate_control_api.h"
113 #include "ih264e_platform_macros.h"
114 #include "ime_statistics.h"
115 
116 
117 /*****************************************************************************/
118 /* Function Definitions                                                      */
119 /*****************************************************************************/
120 
121 /**
122 ******************************************************************************
123 *
124 *  @brief This function generates sps, pps set on request
125 *
126 *  @par   Description
127 *  When the encoder is set in header generation mode, the following function
128 *  is called. This generates sps and pps headers and returns the control back
129 *  to caller.
130 *
131 *  @param[in]    ps_codec
132 *  pointer to codec context
133 *
134 *  @return      success or failure error code
135 *
136 ******************************************************************************
137 */
ih264e_generate_sps_pps(codec_t * ps_codec)138 IH264E_ERROR_T ih264e_generate_sps_pps(codec_t *ps_codec)
139 {
140     /* choose between ping-pong process buffer set */
141     WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
142 
143     /* entropy ctxt */
144     entropy_ctxt_t *ps_entropy = &ps_codec->as_process[ctxt_sel * MAX_PROCESS_THREADS].s_entropy;
145 
146     /* Bitstream structure */
147     bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm;
148 
149     /* sps */
150     sps_t *ps_sps = NULL;
151 
152     /* pps */
153     pps_t *ps_pps = NULL;
154 
155     /* output buff */
156     out_buf_t *ps_out_buf = &ps_codec->as_out_buf[ctxt_sel];
157 
158 
159     /********************************************************************/
160     /*      initialize the bit stream buffer                            */
161     /********************************************************************/
162     ih264e_bitstrm_init(ps_bitstrm, ps_out_buf->s_bits_buf.pv_buf, ps_out_buf->s_bits_buf.u4_bufsize);
163 
164     /********************************************************************/
165     /*                    BEGIN HEADER GENERATION                       */
166     /********************************************************************/
167     /*ps_codec->i4_pps_id ++;*/
168     ps_codec->i4_pps_id %= MAX_PPS_CNT;
169 
170     /*ps_codec->i4_sps_id ++;*/
171     ps_codec->i4_sps_id %= MAX_SPS_CNT;
172 
173     /* populate sps header */
174     ps_sps = ps_codec->ps_sps_base + ps_codec->i4_sps_id;
175     ih264e_populate_sps(ps_codec, ps_sps);
176 
177     /* populate pps header */
178     ps_pps = ps_codec->ps_pps_base + ps_codec->i4_pps_id;
179     ih264e_populate_pps(ps_codec, ps_pps);
180 
181     ps_entropy->i4_error_code = IH264E_SUCCESS;
182 
183     /* generate sps */
184     ps_entropy->i4_error_code = ih264e_generate_sps(ps_bitstrm, ps_sps,
185                                                      &ps_codec->s_cfg.s_vui);
186     if(ps_entropy->i4_error_code != IH264E_SUCCESS)
187     {
188         return ps_entropy->i4_error_code;
189     }
190     /* generate pps */
191     ps_entropy->i4_error_code = ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps);
192 
193     /* queue output buffer */
194     ps_out_buf->s_bits_buf.u4_bytes = ps_bitstrm->u4_strm_buf_offset;
195 
196     return ps_entropy->i4_error_code;
197 }
198 
199 /**
200 *******************************************************************************
201 *
202 * @brief   initialize entropy context.
203 *
204 * @par Description:
205 *  Before invoking the call to perform to entropy coding the entropy context
206 *  associated with the job needs to be initialized. This involves the start
207 *  mb address, end mb address, slice index and the pointer to location at
208 *  which the mb residue info and mb header info are packed.
209 *
210 * @param[in] ps_proc
211 *  Pointer to the current process context
212 *
213 * @returns error status
214 *
215 * @remarks none
216 *
217 *******************************************************************************
218 */
ih264e_init_entropy_ctxt(process_ctxt_t * ps_proc)219 IH264E_ERROR_T ih264e_init_entropy_ctxt(process_ctxt_t *ps_proc)
220 {
221     /* codec context */
222     codec_t *ps_codec = ps_proc->ps_codec;
223 
224     /* entropy ctxt */
225     entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
226 
227     /* start address */
228     ps_entropy->i4_mb_start_add = ps_entropy->i4_mb_y * ps_entropy->i4_wd_mbs + ps_entropy->i4_mb_x;
229 
230     /* end address */
231     ps_entropy->i4_mb_end_add = ps_entropy->i4_mb_start_add + ps_entropy->i4_mb_cnt;
232 
233     /* slice index */
234     ps_entropy->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_entropy->i4_mb_start_add];
235 
236     /* sof */
237     /* @ start of frame or start of a new slice, set sof flag */
238     if (ps_entropy->i4_mb_start_add == 0)
239     {
240         ps_entropy->i4_sof = 1;
241     }
242 
243     if (ps_entropy->i4_mb_x == 0)
244     {
245         /* packed mb coeff data */
246         ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) +
247                         ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data;
248 
249         /* packed mb header data */
250         ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) +
251                         ps_entropy->i4_mb_y * ps_codec->u4_size_header_data;
252     }
253 
254     return IH264E_SUCCESS;
255 }
256 
257 /**
258 *******************************************************************************
259 *
260 * @brief entry point for entropy coding
261 *
262 * @par Description
263 *  This function calls lower level functions to perform entropy coding for a
264 *  group (n rows) of mb's. After encoding 1 row of mb's,  the function takes
265 *  back the control, updates the ctxt and calls lower level functions again.
266 *  This process is repeated till all the rows or group of mb's (which ever is
267 *  minimum) are coded
268 *
269 * @param[in] ps_proc
270 *  process context
271 *
272 * @returns  error status
273 *
274 * @remarks
275 *
276 *******************************************************************************
277 */
278 
ih264e_entropy(process_ctxt_t * ps_proc)279 IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc)
280 {
281     /* codec context */
282     codec_t *ps_codec = ps_proc->ps_codec;
283 
284     /* entropy context */
285     entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
286 
287     /* cabac context */
288     cabac_ctxt_t *ps_cabac_ctxt = ps_entropy->ps_cabac;
289 
290     /* sps */
291     sps_t *ps_sps = ps_entropy->ps_sps_base + (ps_entropy->u4_sps_id % MAX_SPS_CNT);
292 
293     /* pps */
294     pps_t *ps_pps = ps_entropy->ps_pps_base + (ps_entropy->u4_pps_id % MAX_PPS_CNT);
295 
296     /* slice header */
297     slice_header_t *ps_slice_hdr = ps_entropy->ps_slice_hdr_base + (ps_entropy->i4_cur_slice_idx % MAX_SLICE_HDR_CNT);
298 
299     /* slice type */
300     WORD32 i4_slice_type = ps_proc->i4_slice_type;
301 
302     /* Bitstream structure */
303     bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm;
304 
305     /* output buff */
306     out_buf_t s_out_buf;
307 
308     /* sei params */
309     sei_params_t s_sei;
310 
311     /* proc map */
312     UWORD8  *pu1_proc_map;
313 
314     /* entropy map */
315     UWORD8  *pu1_entropy_map_curr;
316 
317     /* proc base idx */
318     WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt % MAX_CTXT_SETS;
319 
320     /* temp var */
321     WORD32 i4_wd_mbs, i4_ht_mbs;
322     UWORD32 u4_mb_cnt, u4_mb_idx, u4_mb_end_idx, u4_insert_per_idr;
323     WORD32 bitstream_start_offset, bitstream_end_offset;
324     /********************************************************************/
325     /*                            BEGIN INIT                            */
326     /********************************************************************/
327 
328     /* entropy encode start address */
329     u4_mb_idx = ps_entropy->i4_mb_start_add;
330 
331     /* entropy encode end address */
332     u4_mb_end_idx = ps_entropy->i4_mb_end_add;
333 
334     /* width in mbs */
335     i4_wd_mbs = ps_entropy->i4_wd_mbs;
336 
337     /* height in mbs */
338     i4_ht_mbs = ps_entropy->i4_ht_mbs;
339 
340     /* total mb cnt */
341     u4_mb_cnt = i4_wd_mbs * i4_ht_mbs;
342 
343     /* proc map */
344     pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs;
345 
346     /* entropy map */
347     pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs;
348 
349     /********************************************************************/
350     /* @ start of frame / slice,                                        */
351     /*      initialize the output buffer,                               */
352     /*      initialize the bit stream buffer,                           */
353     /*      check if sps and pps headers have to be generated,          */
354     /*      populate and generate slice header                          */
355     /********************************************************************/
356     if (ps_entropy->i4_sof)
357     {
358         /********************************************************************/
359         /*      initialize the output buffer                                */
360         /********************************************************************/
361         s_out_buf = ps_codec->as_out_buf[ctxt_sel];
362 
363         /* is last frame to encode */
364         s_out_buf.u4_is_last = ps_entropy->u4_is_last;
365 
366         /* frame idx */
367         s_out_buf.u4_timestamp_high = ps_entropy->u4_timestamp_high;
368         s_out_buf.u4_timestamp_low = ps_entropy->u4_timestamp_low;
369 
370         /********************************************************************/
371         /*      initialize the bit stream buffer                            */
372         /********************************************************************/
373         ih264e_bitstrm_init(ps_bitstrm, s_out_buf.s_bits_buf.pv_buf, s_out_buf.s_bits_buf.u4_bufsize);
374 
375         /********************************************************************/
376         /*                    BEGIN HEADER GENERATION                       */
377         /********************************************************************/
378         if (1 == ps_entropy->i4_gen_header)
379         {
380             /* generate sps */
381             ps_entropy->i4_error_code = ih264e_generate_sps(ps_bitstrm, ps_sps,
382                                                              &ps_codec->s_cfg.s_vui);
383             RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
384             /* generate pps */
385             ps_entropy->i4_error_code = ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps);
386             RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
387 
388             /* reset i4_gen_header */
389             ps_entropy->i4_gen_header = 0;
390         }
391 
392         /* populate slice header */
393         ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps, ps_sps);
394 
395         /* generate sei */
396         u4_insert_per_idr = (NAL_SLICE_IDR == ps_slice_hdr->i1_nal_unit_type);
397 
398         memset(&s_sei, 0, sizeof(sei_params_t));
399         s_sei.u1_sei_mdcv_params_present_flag =
400                     ps_codec->s_cfg.s_sei.u1_sei_mdcv_params_present_flag;
401         s_sei.s_sei_mdcv_params = ps_codec->s_cfg.s_sei.s_sei_mdcv_params;
402         s_sei.u1_sei_cll_params_present_flag =
403                     ps_codec->s_cfg.s_sei.u1_sei_cll_params_present_flag;
404         s_sei.s_sei_cll_params = ps_codec->s_cfg.s_sei.s_sei_cll_params;
405         s_sei.u1_sei_ave_params_present_flag =
406                     ps_codec->s_cfg.s_sei.u1_sei_ave_params_present_flag;
407         s_sei.s_sei_ave_params = ps_codec->s_cfg.s_sei.s_sei_ave_params;
408         s_sei.u1_sei_ccv_params_present_flag = 0;
409         s_sei.s_sei_ccv_params =
410                     ps_codec->as_inp_list[ps_codec->i4_poc % MAX_NUM_BFRAMES].s_sei_ccv;
411         s_sei.u1_sei_sii_params_present_flag = ps_codec->s_cfg.s_sei.u1_sei_sii_params_present_flag;
412         s_sei.s_sei_sii_params = ps_codec->s_cfg.s_sei.s_sei_sii_params;
413 
414         if((1 == ps_sps->i1_vui_parameters_present_flag) &&
415            (1 == ps_codec->s_cfg.s_vui.u1_video_signal_type_present_flag) &&
416            (1 == ps_codec->s_cfg.s_vui.u1_colour_description_present_flag) &&
417            (2 != ps_codec->s_cfg.s_vui.u1_colour_primaries) &&
418            (2 != ps_codec->s_cfg.s_vui.u1_matrix_coefficients) &&
419            (2 != ps_codec->s_cfg.s_vui.u1_transfer_characteristics) &&
420            (4 != ps_codec->s_cfg.s_vui.u1_transfer_characteristics) &&
421            (5 != ps_codec->s_cfg.s_vui.u1_transfer_characteristics))
422         {
423             s_sei.u1_sei_ccv_params_present_flag =
424             ps_codec->as_inp_list[ps_codec->i4_poc % MAX_NUM_BFRAMES].u1_sei_ccv_params_present_flag;
425         }
426 
427         if((1 == s_sei.u1_sei_mdcv_params_present_flag && u4_insert_per_idr) ||
428            (1 == s_sei.u1_sei_cll_params_present_flag && u4_insert_per_idr) ||
429            (1 == s_sei.u1_sei_ave_params_present_flag && u4_insert_per_idr) ||
430            (1 == s_sei.u1_sei_ccv_params_present_flag) ||
431            (1 == s_sei.u1_sei_sii_params_present_flag))
432         {
433             ps_entropy->i4_error_code =
434                     ih264e_generate_sei(ps_bitstrm, &s_sei, u4_insert_per_idr);
435             RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
436         }
437         ps_codec->as_inp_list[ps_codec->i4_poc % MAX_NUM_BFRAMES].u1_sei_ccv_params_present_flag = 0;
438 
439         /* generate slice header */
440         ps_entropy->i4_error_code = ih264e_generate_slice_header(ps_bitstrm, ps_slice_hdr,
441                                                                   ps_pps, ps_sps);
442         RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
443         /* once start of frame / slice is done, you can reset it */
444         /* it is the responsibility of the caller to set this flag */
445         ps_entropy->i4_sof = 0;
446 
447         if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
448         {
449             BITSTREAM_BYTE_ALIGN(ps_bitstrm);
450             BITSTREAM_FLUSH(ps_bitstrm, ps_entropy->i4_error_code);
451             ih264e_init_cabac_ctxt(ps_entropy);
452         }
453     }
454 
455     /* begin entropy coding for the mb set */
456     while (u4_mb_idx < u4_mb_end_idx)
457     {
458         /* init ptrs/indices */
459         if (ps_entropy->i4_mb_x == i4_wd_mbs)
460         {
461             ps_entropy->i4_mb_y++;
462             ps_entropy->i4_mb_x = 0;
463 
464             /* packed mb coeff data */
465             ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) +
466                             ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data;
467 
468             /* packed mb header data */
469             ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) +
470                             ps_entropy->i4_mb_y * ps_codec->u4_size_header_data;
471 
472             /* proc map */
473             pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs;
474 
475             /* entropy map */
476             pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs;
477         }
478 
479         DEBUG("\nmb indices x, y %d, %d", ps_entropy->i4_mb_x, ps_entropy->i4_mb_y);
480         ENTROPY_TRACE("mb index x %d", ps_entropy->i4_mb_x);
481         ENTROPY_TRACE("mb index y %d", ps_entropy->i4_mb_y);
482 
483         /* wait until the curr mb is core coded */
484         /* The wait for curr mb to be core coded is essential when entropy is launched
485          * as a separate job
486          */
487         while (1)
488         {
489             volatile UWORD8 *pu1_buf1;
490             WORD32 idx = ps_entropy->i4_mb_x;
491 
492             pu1_buf1 = pu1_proc_map + idx;
493             if (*pu1_buf1)
494                 break;
495             ithread_yield();
496         }
497 
498 
499         /* write mb layer */
500         ps_entropy->i4_error_code = ps_codec->pf_write_mb_syntax_layer
501                         [ps_entropy->u1_entropy_coding_mode_flag][i4_slice_type](ps_entropy);
502         RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
503 
504         /* Starting bitstream offset for header in bits */
505         bitstream_start_offset = GET_NUM_BITS(ps_bitstrm);
506 
507         /* set entropy map */
508         pu1_entropy_map_curr[ps_entropy->i4_mb_x] = 1;
509 
510         u4_mb_idx++;
511         ps_entropy->i4_mb_x++;
512         /* check for eof */
513         if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
514         {
515             if (ps_entropy->i4_mb_x < i4_wd_mbs)
516             {
517                 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
518             }
519         }
520 
521         if (ps_entropy->i4_mb_x == i4_wd_mbs)
522         {
523             /* if slices are enabled */
524             if (ps_codec->s_cfg.e_slice_mode == IVE_SLICE_MODE_BLOCKS)
525             {
526                 /* current slice index */
527                 WORD32 i4_curr_slice_idx = ps_entropy->i4_cur_slice_idx;
528 
529                 /* slice map */
530                 UWORD8 *pu1_slice_idx = ps_entropy->pu1_slice_idx;
531 
532                 /* No need to open a slice at end of frame. The current slice can be closed at the time
533                  * of signaling eof flag.
534                  */
535                 if ((u4_mb_idx != u4_mb_cnt) && (i4_curr_slice_idx
536                                                 != pu1_slice_idx[u4_mb_idx]))
537                 {
538                     if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag)
539                     { /* mb skip run */
540                         if ((i4_slice_type != ISLICE)
541                                         && *ps_entropy->pi4_mb_skip_run)
542                         {
543                             if (*ps_entropy->pi4_mb_skip_run)
544                             {
545                                 PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run,
546                                             ps_entropy->i4_error_code, "mb skip run");
547                                 *ps_entropy->pi4_mb_skip_run = 0;
548                                 RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
549                             }
550                         }
551                         /* put rbsp trailing bits for the previous slice */
552                         ps_entropy->i4_error_code = ih264e_put_rbsp_trailing_bits(ps_bitstrm);
553                         RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
554                     }
555                     else
556                     {
557                         ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1);
558                     }
559 
560                     /* update slice header pointer */
561                     i4_curr_slice_idx = pu1_slice_idx[u4_mb_idx];
562                     ps_entropy->i4_cur_slice_idx = i4_curr_slice_idx;
563                     ps_slice_hdr = ps_entropy->ps_slice_hdr_base+ (i4_curr_slice_idx % MAX_SLICE_HDR_CNT);
564 
565                     /* populate slice header */
566                     ps_entropy->i4_mb_start_add = u4_mb_idx;
567                     ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps,
568                                                  ps_sps);
569 
570                     /* generate slice header */
571                     ps_entropy->i4_error_code = ih264e_generate_slice_header(
572                                     ps_bitstrm, ps_slice_hdr, ps_pps, ps_sps);
573                     RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
574                     if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
575                     {
576                         BITSTREAM_BYTE_ALIGN(ps_bitstrm);
577                         BITSTREAM_FLUSH(ps_bitstrm, ps_entropy->i4_error_code);
578                         ih264e_init_cabac_ctxt(ps_entropy);
579                     }
580                 }
581                 else
582                 {
583                     if (CABAC == ps_entropy->u1_entropy_coding_mode_flag
584                                     && u4_mb_idx != u4_mb_cnt)
585                     {
586                         ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
587                     }
588                 }
589             }
590         }
591 
592         /* Ending bitstream offset for header in bits */
593         bitstream_end_offset = GET_NUM_BITS(ps_bitstrm);
594         ps_entropy->u4_header_bits[i4_slice_type == PSLICE] +=
595                         bitstream_end_offset - bitstream_start_offset;
596     }
597 
598     /* check for eof */
599     if (u4_mb_idx == u4_mb_cnt)
600     {
601         /* set end of frame flag */
602         ps_entropy->i4_eof = 1;
603     }
604     else
605     {
606         if (CABAC == ps_entropy->u1_entropy_coding_mode_flag
607                         && ps_codec->s_cfg.e_slice_mode
608                                         != IVE_SLICE_MODE_BLOCKS)
609         {
610             ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
611         }
612     }
613 
614     if (ps_entropy->i4_eof)
615     {
616         if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag)
617         {
618             /* mb skip run */
619             if ((i4_slice_type != ISLICE) && *ps_entropy->pi4_mb_skip_run)
620             {
621                 if (*ps_entropy->pi4_mb_skip_run)
622                 {
623                     PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run,
624                                  ps_entropy->i4_error_code, "mb skip run");
625                     *ps_entropy->pi4_mb_skip_run = 0;
626                     RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
627                 }
628             }
629             /* put rbsp trailing bits */
630              ps_entropy->i4_error_code = ih264e_put_rbsp_trailing_bits(ps_bitstrm);
631              RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
632         }
633         else
634         {
635             ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1);
636         }
637 
638         /* update current frame stats to rc library */
639         {
640             /* number of bytes to stuff */
641             WORD32 i4_stuff_bytes;
642 
643             /* update */
644             i4_stuff_bytes = ih264e_update_rc_post_enc(
645                             ps_codec, ctxt_sel,
646                             (ps_proc->ps_codec->i4_poc == 0));
647 
648             /* cbr rc - house keeping */
649             if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel])
650             {
651                  ps_entropy->ps_bitstrm->u4_strm_buf_offset = 0;
652             }
653             else if (i4_stuff_bytes)
654             {
655                 /* add filler nal units */
656                  ps_entropy->i4_error_code = ih264e_add_filler_nal_unit(ps_bitstrm, i4_stuff_bytes);
657                  RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
658             }
659         }
660 
661         /*
662          *Frame number is to be incremented only if the current frame is a
663          * reference frame. After each successful frame encode, we increment
664          * frame number by 1
665          */
666         if (!ps_codec->s_rate_control.post_encode_skip[ctxt_sel]
667                         && ps_codec->u4_is_curr_frm_ref)
668         {
669             ps_codec->i4_frame_num++;
670         }
671         /********************************************************************/
672         /*      signal the output                                           */
673         /********************************************************************/
674         ps_codec->as_out_buf[ctxt_sel].s_bits_buf.u4_bytes =
675                         ps_entropy->ps_bitstrm->u4_strm_buf_offset;
676 
677         DEBUG("entropy status %x", ps_entropy->i4_error_code);
678     }
679 
680     /* Dont execute any further instructions until store synchronization took place */
681     DATA_SYNC();
682 
683     /* allow threads to dequeue entropy jobs */
684     ps_codec->au4_entropy_thread_active[ctxt_sel] = 0;
685 
686     return ps_entropy->i4_error_code;
687 }
688 
689 /**
690 *******************************************************************************
691 *
692 * @brief Packs header information of a mb in to a buffer
693 *
694 * @par Description:
695 *  After the deciding the mode info of a macroblock, the syntax elements
696 *  associated with the mb are packed and stored. The entropy thread unpacks
697 *  this buffer and generates the end bit stream.
698 *
699 * @param[in] ps_proc
700 *  Pointer to the current process context
701 *
702 * @returns error status
703 *
704 * @remarks none
705 *
706 *******************************************************************************
707 */
ih264e_pack_header_data(process_ctxt_t * ps_proc)708 IH264E_ERROR_T ih264e_pack_header_data(process_ctxt_t *ps_proc)
709 {
710     /* curr mb type */
711     UWORD32 u4_mb_type = ps_proc->u4_mb_type;
712 
713     /* pack mb syntax layer of curr mb (used for entropy coding) */
714     if (u4_mb_type == I4x4)
715     {
716         /* pointer to mb header storage space */
717         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
718         mb_hdr_i4x4_t *ps_mb_hdr = (mb_hdr_i4x4_t *)ps_proc->pv_mb_header_data;
719 
720         /* temp var */
721         WORD32 i4, byte;
722 
723         /* mb type plus mode */
724         ps_mb_hdr->common.u1_mb_type_mode = (ps_proc->u1_c_i8_mode << 6) + u4_mb_type;
725 
726         /* cbp */
727         ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
728 
729         /* mb qp delta */
730         ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
731 
732         /* sub mb modes */
733         for (i4 = 0; i4 < 16; i4 ++)
734         {
735             byte = 0;
736 
737             if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] ==
738                             ps_proc->au1_intra_luma_mb_4x4_modes[i4])
739             {
740                 byte |= 1;
741             }
742             else
743             {
744 
745                 if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] <
746                                 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4])
747                 {
748                     byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 1);
749                 }
750                 else
751                 {
752                     byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 1;
753                 }
754             }
755 
756             i4++;
757 
758             if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] ==
759                             ps_proc->au1_intra_luma_mb_4x4_modes[i4])
760             {
761                 byte |= 16;
762             }
763             else
764             {
765 
766                 if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] <
767                                 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4])
768                 {
769                     byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 5);
770                 }
771                 else
772                 {
773                     byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 5;
774                 }
775             }
776 
777             ps_mb_hdr->au1_sub_blk_modes[i4 >> 1] =  byte;
778         }
779 
780         /* end of mb layer */
781         pu1_ptr += sizeof(mb_hdr_i4x4_t);
782         ps_proc->pv_mb_header_data = pu1_ptr;
783     }
784     else if (u4_mb_type == I16x16)
785     {
786         /* pointer to mb header storage space */
787         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
788         mb_hdr_i16x16_t *ps_mb_hdr = (mb_hdr_i16x16_t *)ps_proc->pv_mb_header_data;
789 
790         /* mb type plus mode */
791         ps_mb_hdr->common.u1_mb_type_mode = (ps_proc->u1_c_i8_mode << 6) + (ps_proc->u1_l_i16_mode << 4) + u4_mb_type;
792 
793         /* cbp */
794         ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
795 
796         /* mb qp delta */
797         ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
798 
799         /* end of mb layer */
800         pu1_ptr += sizeof(mb_hdr_i16x16_t);
801         ps_proc->pv_mb_header_data = pu1_ptr;
802     }
803     else if (u4_mb_type == P16x16)
804     {
805         /* pointer to mb header storage space */
806         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
807         mb_hdr_p16x16_t *ps_mb_hdr = (mb_hdr_p16x16_t *)ps_proc->pv_mb_header_data;
808 
809         /* mb type */
810         ps_mb_hdr->common.u1_mb_type_mode = u4_mb_type;
811 
812         /* cbp */
813         ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
814 
815         /* mb qp delta */
816         ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
817 
818         ps_mb_hdr->ai2_mv[0] = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx - ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
819 
820         ps_mb_hdr->ai2_mv[1] = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy - ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
821 
822         /* end of mb layer */
823         pu1_ptr += sizeof(mb_hdr_p16x16_t);
824         ps_proc->pv_mb_header_data = pu1_ptr;
825     }
826     else if (u4_mb_type == PSKIP)
827     {
828         /* pointer to mb header storage space */
829         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
830         mb_hdr_pskip_t *ps_mb_hdr = (mb_hdr_pskip_t *)ps_proc->pv_mb_header_data;
831 
832         /* mb type */
833         ps_mb_hdr->common.u1_mb_type_mode = u4_mb_type;
834 
835         /* end of mb layer */
836         pu1_ptr += sizeof(mb_hdr_pskip_t);
837         ps_proc->pv_mb_header_data = pu1_ptr;
838     }
839     else if(u4_mb_type == B16x16)
840     {
841 
842         /* pointer to mb header storage space */
843         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
844         mb_hdr_b16x16_t *ps_mb_hdr = (mb_hdr_b16x16_t *)ps_proc->pv_mb_header_data;
845 
846         UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode;
847 
848         /* mb type plus mode */
849         ps_mb_hdr->common.u1_mb_type_mode = (u4_pred_mode << 4) + u4_mb_type;
850 
851         /* cbp */
852         ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
853 
854         /* mb qp delta */
855         ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
856 
857         /* l0 & l1 me data */
858         if (u4_pred_mode != PRED_L1)
859         {
860             ps_mb_hdr->ai2_mv[0][0] = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx
861                             - ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
862 
863             ps_mb_hdr->ai2_mv[0][1] = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy
864                             - ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
865         }
866         if (u4_pred_mode != PRED_L0)
867         {
868             ps_mb_hdr->ai2_mv[1][0] = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvx
869                             - ps_proc->ps_pred_mv[1].s_mv.i2_mvx;
870 
871             ps_mb_hdr->ai2_mv[1][1] = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvy
872                             - ps_proc->ps_pred_mv[1].s_mv.i2_mvy;
873         }
874 
875         /* end of mb layer */
876         pu1_ptr += sizeof(mb_hdr_b16x16_t);
877         ps_proc->pv_mb_header_data = pu1_ptr;
878 
879     }
880     else if(u4_mb_type == BDIRECT)
881     {
882         /* pointer to mb header storage space */
883         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
884         mb_hdr_bdirect_t *ps_mb_hdr = (mb_hdr_bdirect_t *)ps_proc->pv_mb_header_data;
885 
886         /* mb type plus mode */
887         ps_mb_hdr->common.u1_mb_type_mode = u4_mb_type;
888 
889         /* cbp */
890         ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
891 
892         /* mb qp delta */
893         ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
894 
895         /* end of mb layer */
896         pu1_ptr += sizeof(mb_hdr_bdirect_t);
897         ps_proc->pv_mb_header_data = pu1_ptr;
898 
899     }
900     else if(u4_mb_type == BSKIP)
901     {
902         UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode;
903 
904         /* pointer to mb header storage space */
905         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
906         mb_hdr_bskip_t *ps_mb_hdr = (mb_hdr_bskip_t *)ps_proc->pv_mb_header_data;
907 
908         /* mb type plus mode */
909         ps_mb_hdr->common.u1_mb_type_mode = (u4_pred_mode << 4) + u4_mb_type;
910 
911         /* end of mb layer */
912         pu1_ptr += sizeof(mb_hdr_bskip_t);
913         ps_proc->pv_mb_header_data = pu1_ptr;
914     }
915 
916     return IH264E_SUCCESS;
917 }
918 
919 /**
920 *******************************************************************************
921 *
922 * @brief   update process context after encoding an mb. This involves preserving
923 * the current mb information for later use, initialize the proc ctxt elements to
924 * encode next mb.
925 *
926 * @par Description:
927 *  This function performs house keeping tasks after encoding an mb.
928 *  After encoding an mb, various elements of the process context needs to be
929 *  updated to encode the next mb. For instance, the source, recon and reference
930 *  pointers, mb indices have to be adjusted to the next mb. The slice index of
931 *  the current mb needs to be updated. If mb qp modulation is enabled, then if
932 *  the qp changes the quant param structure needs to be updated. Also to encoding
933 *  the next mb, the current mb info is used as part of mode prediction or mv
934 *  prediction. Hence the current mb info has to preserved at top/top left/left
935 *  locations.
936 *
937 * @param[in] ps_proc
938 *  Pointer to the current process context
939 *
940 * @returns none
941 *
942 * @remarks none
943 *
944 *******************************************************************************
945 */
ih264e_update_proc_ctxt(process_ctxt_t * ps_proc)946 WORD32 ih264e_update_proc_ctxt(process_ctxt_t *ps_proc)
947 {
948     /* error status */
949     WORD32 error_status = IH264_SUCCESS;
950 
951     /* codec context */
952     codec_t *ps_codec = ps_proc->ps_codec;
953 
954     /* curr mb indices */
955     WORD32 i4_mb_x = ps_proc->i4_mb_x;
956     WORD32 i4_mb_y = ps_proc->i4_mb_y;
957 
958     /* mb syntax elements of neighbors */
959     mb_info_t *ps_left_syn =  &ps_proc->s_left_mb_syntax_ele;
960     mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + i4_mb_x;
961     mb_info_t *ps_top_left_syn = &ps_proc->s_top_left_mb_syntax_ele;
962 
963     /* curr mb type */
964     UWORD32 u4_mb_type = ps_proc->u4_mb_type;
965 
966     /* curr mb type */
967     UWORD32 u4_is_intra = ps_proc->u4_is_intra;
968 
969     /* width in mbs */
970     WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs;
971 
972     /*height in mbs*/
973     WORD32 i4_ht_mbs = ps_proc->i4_ht_mbs;
974 
975     /* proc map */
976     UWORD8 *pu1_proc_map = ps_proc->pu1_proc_map + (i4_mb_y * i4_wd_mbs);
977 
978     /* deblk context */
979     deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
980 
981     /* deblk bs context */
982     bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt);
983 
984     /* top row motion vector info */
985     enc_pu_t *ps_top_row_pu = ps_proc->ps_top_row_pu + i4_mb_x;
986 
987     /* top left mb motion vector */
988     enc_pu_t *ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu;
989 
990     /* left mb motion vector */
991     enc_pu_t *ps_left_mb_pu = &ps_proc->s_left_mb_pu;
992 
993     /* sub mb modes */
994     UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (i4_mb_x << 4);
995 
996     /*************************************************************/
997     /* During MV prediction, when top right mb is not available, */
998     /* top left mb info. is used for prediction. Hence the curr  */
999     /* top, which will be top left for the next mb needs to be   */
1000     /* preserved before updating it with curr mb info.           */
1001     /*************************************************************/
1002 
1003     /* mb type, mb class, csbp */
1004     *ps_top_left_syn = *ps_top_syn;
1005 
1006     if (ps_proc->i4_slice_type != ISLICE)
1007     {
1008         /*****************************************/
1009         /* update top left with top info results */
1010         /*****************************************/
1011         /* mv */
1012         *ps_top_left_mb_pu = *ps_top_row_pu;
1013     }
1014 
1015     /*************************************************/
1016     /* update top and left with curr mb info results */
1017     /*************************************************/
1018 
1019     /* mb type */
1020     ps_left_syn->u2_mb_type = ps_top_syn->u2_mb_type = u4_mb_type;
1021 
1022     /* mb class */
1023     ps_left_syn->u2_is_intra = ps_top_syn->u2_is_intra = u4_is_intra;
1024 
1025     /* csbp */
1026     ps_left_syn->u4_csbp = ps_top_syn->u4_csbp = ps_proc->u4_csbp;
1027 
1028     /* distortion */
1029     ps_left_syn->i4_mb_distortion = ps_top_syn->i4_mb_distortion = ps_proc->i4_mb_distortion;
1030 
1031     if (u4_is_intra)
1032     {
1033         /* mb / sub mb modes */
1034         if (I16x16 == u4_mb_type)
1035         {
1036             pu1_top_mb_intra_modes[0] = ps_proc->au1_left_mb_intra_modes[0] = ps_proc->u1_l_i16_mode;
1037         }
1038         else if (I4x4 == u4_mb_type)
1039         {
1040             ps_codec->pf_mem_cpy_mul8(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16);
1041             ps_codec->pf_mem_cpy_mul8(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16);
1042         }
1043         else if (I8x8 == u4_mb_type)
1044         {
1045             memcpy(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4);
1046             memcpy(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4);
1047         }
1048 
1049         if ((ps_proc->i4_slice_type == PSLICE) ||(ps_proc->i4_slice_type == BSLICE))
1050         {
1051             /* mv */
1052             *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu);
1053         }
1054 
1055         *ps_proc->pu4_mb_pu_cnt = 1;
1056     }
1057     else
1058     {
1059         /* mv */
1060         *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu);
1061     }
1062 
1063     /*
1064      * Mark that the MB has been coded intra
1065      * So that future AIRs can skip it
1066      */
1067     ps_proc->pu1_is_intra_coded[i4_mb_x + (i4_mb_y * i4_wd_mbs)] = u4_is_intra;
1068 
1069     /**************************************************/
1070     /* pack mb header info. for entropy coding        */
1071     /**************************************************/
1072     ih264e_pack_header_data(ps_proc);
1073 
1074     /* update previous mb qp */
1075     ps_proc->u4_mb_qp_prev = ps_proc->u4_mb_qp;
1076 
1077     /* store qp */
1078     ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp;
1079 
1080     /*
1081      * We need to sync the cache to make sure that the nmv content of proc
1082      * is updated to cache properly
1083      */
1084     DATA_SYNC();
1085 
1086     /* Just before finishing the row, enqueue the job in to entropy queue.
1087      * The master thread depending on its convenience shall dequeue it and
1088      * performs entropy.
1089      *
1090      * WARN !! Placing this block post proc map update can cause queuing of
1091      * entropy jobs in out of order.
1092      */
1093     if (i4_mb_x == i4_wd_mbs - 1)
1094     {
1095         /* job structures */
1096         job_t s_job;
1097 
1098         /* job class */
1099         s_job.i4_cmd = CMD_ENTROPY;
1100 
1101         /* number of mbs to be processed in the current job */
1102         s_job.i2_mb_cnt = ps_codec->s_cfg.i4_wd_mbs;
1103 
1104         /* job start index x */
1105         s_job.i2_mb_x = 0;
1106 
1107         /* job start index y */
1108         s_job.i2_mb_y = ps_proc->i4_mb_y;
1109 
1110         /* proc base idx */
1111         s_job.i2_proc_base_idx = (ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS) ? (MAX_PROCESS_CTXT / 2) : 0;
1112 
1113         /* queue the job */
1114         error_status = ih264_list_queue(ps_proc->pv_entropy_jobq, &s_job, 1);
1115         if(error_status != IH264_SUCCESS)
1116         {
1117             return error_status;
1118         }
1119         if(ps_proc->i4_mb_y == (i4_ht_mbs - 1))
1120             ih264_list_terminate(ps_codec->pv_entropy_jobq);
1121     }
1122 
1123     /* update proc map */
1124     pu1_proc_map[i4_mb_x] = 1;
1125 
1126     /**************************************************/
1127     /* update proc ctxt elements for encoding next mb */
1128     /**************************************************/
1129     /* update indices */
1130     i4_mb_x ++;
1131     ps_proc->i4_mb_x = i4_mb_x;
1132 
1133     if (ps_proc->i4_mb_x == i4_wd_mbs)
1134     {
1135         ps_proc->i4_mb_y++;
1136         ps_proc->i4_mb_x = 0;
1137     }
1138 
1139     /* update slice index */
1140     ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_proc->i4_mb_y * i4_wd_mbs + ps_proc->i4_mb_x];
1141 
1142     /* update buffers pointers */
1143     ps_proc->pu1_src_buf_luma += MB_SIZE;
1144     ps_proc->pu1_rec_buf_luma += MB_SIZE;
1145     ps_proc->apu1_ref_buf_luma[0] += MB_SIZE;
1146     ps_proc->apu1_ref_buf_luma[1] += MB_SIZE;
1147 
1148     /*
1149      * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
1150      * the stride per MB is MB_SIZE
1151      */
1152     ps_proc->pu1_src_buf_chroma += MB_SIZE;
1153     ps_proc->pu1_rec_buf_chroma += MB_SIZE;
1154     ps_proc->apu1_ref_buf_chroma[0] += MB_SIZE;
1155     ps_proc->apu1_ref_buf_chroma[1] += MB_SIZE;
1156 
1157 
1158 
1159     /* Reset cost, distortion params */
1160     ps_proc->i4_mb_cost = INT_MAX;
1161     ps_proc->i4_mb_distortion = SHRT_MAX;
1162 
1163     ps_proc->ps_pu += *ps_proc->pu4_mb_pu_cnt;
1164 
1165     ps_proc->pu4_mb_pu_cnt += 1;
1166 
1167     /* Update colocated pu */
1168     if (ps_proc->i4_slice_type == BSLICE)
1169         ps_proc->ps_colpu += *(ps_proc->aps_mv_buf[1]->pu4_mb_pu_cnt +  (i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x);
1170 
1171     /* deblk ctxts */
1172     if (ps_proc->u4_disable_deblock_level != 1)
1173     {
1174         /* indices */
1175         ps_bs->i4_mb_x = ps_proc->i4_mb_x;
1176         ps_bs->i4_mb_y = ps_proc->i4_mb_y;
1177 
1178 #ifndef N_MB_ENABLE /* For N MB processing update take place inside deblocking function */
1179         ps_deblk->i4_mb_x ++;
1180 
1181         ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1182         /*
1183          * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
1184          * the stride per MB is MB_SIZE
1185          */
1186         ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1187 #endif
1188     }
1189 
1190     return error_status;
1191 }
1192 
1193 /**
1194 *******************************************************************************
1195 *
1196 * @brief   initialize process context.
1197 *
1198 * @par Description:
1199 *  Before dispatching the current job to process thread, the process context
1200 *  associated with the job is initialized. Usually every job aims to encode one
1201 *  row of mb's. Basing on the row indices provided by the job, the process
1202 *  context's buffer ptrs, slice indices and other elements that are necessary
1203 *  during core-coding are initialized.
1204 *
1205 * @param[in] ps_proc
1206 *  Pointer to the current process context
1207 *
1208 * @returns error status
1209 *
1210 * @remarks none
1211 *
1212 *******************************************************************************
1213 */
ih264e_init_proc_ctxt(process_ctxt_t * ps_proc)1214 IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc)
1215 {
1216     /* codec context */
1217     codec_t *ps_codec = ps_proc->ps_codec;
1218 
1219     /* nmb processing context*/
1220     n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt;
1221 
1222     /* indices */
1223     WORD32 i4_mb_x, i4_mb_y;
1224 
1225     /* strides */
1226     WORD32 i4_src_strd = ps_proc->i4_src_strd;
1227     WORD32 i4_src_chroma_strd = ps_proc->i4_src_chroma_strd;
1228     WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1229 
1230     /* quant params */
1231     quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
1232 
1233     /* deblk ctxt */
1234     deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
1235 
1236     /* deblk bs context */
1237     bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt);
1238 
1239     /* Pointer to mv_buffer of current frame */
1240     mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf;
1241 
1242     /* Pointers for color space conversion */
1243     UWORD8 *pu1_y_buf_base, *pu1_u_buf_base, *pu1_v_buf_base;
1244 
1245     /* Pad the MB to support non standard sizes */
1246     UWORD32 u4_pad_right_sz = ps_codec->s_cfg.u4_wd - ps_codec->s_cfg.u4_disp_wd;
1247     UWORD32 u4_pad_bottom_sz = ps_codec->s_cfg.u4_ht - ps_codec->s_cfg.u4_disp_ht;
1248     UWORD16 u2_num_rows = MB_SIZE;
1249     WORD32 convert_uv_only;
1250 
1251     /********************************************************************/
1252     /*                            BEGIN INIT                            */
1253     /********************************************************************/
1254 
1255     i4_mb_x = ps_proc->i4_mb_x;
1256     i4_mb_y = ps_proc->i4_mb_y;
1257 
1258     /* Number of mbs processed in one loop of process function */
1259     ps_proc->i4_nmb_ntrpy = ps_proc->i4_wd_mbs;
1260     ps_proc->u4_nmb_me = ps_proc->i4_wd_mbs;
1261 
1262     /* init buffer pointers */
1263     convert_uv_only = 1;
1264     if (u4_pad_bottom_sz || u4_pad_right_sz ||
1265         ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE ||
1266         ps_codec->s_cfg.u4_enable_quality_metrics & QUALITY_MASK_PSNR ||
1267         ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1))
1268     {
1269         if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
1270             u2_num_rows = (UWORD16) MB_SIZE - u4_pad_bottom_sz;
1271         ps_proc->pu1_src_buf_luma_base = ps_codec->pu1_y_csc_buf_base;
1272         i4_src_strd = ps_proc->i4_src_strd = ps_codec->s_cfg.u4_max_wd;
1273         ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * MB_SIZE);
1274         convert_uv_only = 0;
1275     }
1276     else
1277     {
1278         i4_src_strd = ps_proc->i4_src_strd = ps_proc->s_inp_buf.s_raw_buf.au4_strd[0];
1279         ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_src_strd * (i4_mb_y * MB_SIZE);
1280     }
1281 
1282 
1283     if (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE ||
1284         ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420P ||
1285         ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1) ||
1286         ps_codec->s_cfg.u4_enable_quality_metrics & QUALITY_MASK_PSNR ||
1287         u4_pad_bottom_sz || u4_pad_right_sz)
1288     {
1289         if ((ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_UV) ||
1290             (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU))
1291             ps_proc->pu1_src_buf_chroma_base = ps_codec->pu1_uv_csc_buf_base;
1292 
1293         ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * BLK8x8SIZE);
1294         i4_src_chroma_strd = ps_proc->i4_src_chroma_strd = ps_codec->s_cfg.u4_max_wd;
1295     }
1296     else
1297     {
1298         i4_src_chroma_strd = ps_proc->i4_src_chroma_strd = ps_proc->s_inp_buf.s_raw_buf.au4_strd[1];
1299         ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_src_chroma_strd * (i4_mb_y * BLK8x8SIZE);
1300     }
1301 
1302     ps_proc->pu1_rec_buf_luma = ps_proc->pu1_rec_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
1303     ps_proc->pu1_rec_buf_chroma = ps_proc->pu1_rec_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
1304 
1305     /* Tempral back and forward reference buffer */
1306     ps_proc->apu1_ref_buf_luma[0] = ps_proc->apu1_ref_buf_luma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
1307     ps_proc->apu1_ref_buf_chroma[0] = ps_proc->apu1_ref_buf_chroma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
1308     ps_proc->apu1_ref_buf_luma[1] = ps_proc->apu1_ref_buf_luma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
1309     ps_proc->apu1_ref_buf_chroma[1] = ps_proc->apu1_ref_buf_chroma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
1310 
1311     /*
1312      * Do color space conversion
1313      * NOTE : We assume there that the number of MB's to process will not span multiple rows
1314      */
1315     switch (ps_codec->s_cfg.e_inp_color_fmt)
1316     {
1317         case IV_YUV_420SP_UV:
1318         case IV_YUV_420SP_VU:
1319             /* In case of 420 semi-planar input, copy last few rows to intermediate
1320                buffer as few SIMD functions access upto 16 more bytes.
1321                This data will be padded if required */
1322             if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1) || u4_pad_bottom_sz || u4_pad_right_sz)
1323             {
1324                 WORD32 num_rows = MB_SIZE;
1325                 UWORD8 *pu1_src;
1326                 UWORD8 *pu1_dst;
1327                 WORD32 i;
1328                 pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) +
1329                           ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
1330 
1331                 pu1_dst = ps_proc->pu1_src_buf_luma;
1332 
1333                 if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1))
1334                     num_rows = MB_SIZE - u4_pad_bottom_sz;
1335                 for (i = 0; i < num_rows; i++)
1336                 {
1337                     memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_disp_wd);
1338                     pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[0];
1339                     pu1_dst += ps_proc->i4_src_strd;
1340                 }
1341                 pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) +
1342                           ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE);
1343                 pu1_dst = ps_proc->pu1_src_buf_chroma;
1344 
1345                 /* Last MB row of chroma is copied unconditionally, since trans functions access an extra byte
1346                  * due to interleaved input
1347                  */
1348                 if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1))
1349                     num_rows = (ps_codec->s_cfg.u4_disp_ht >> 1) - (ps_proc->i4_mb_y * BLK8x8SIZE);
1350                 else
1351                     num_rows = BLK8x8SIZE;
1352                 for (i = 0; i < num_rows; i++)
1353                 {
1354                     memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_disp_wd);
1355                     pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[1];
1356                     pu1_dst += ps_proc->i4_src_chroma_strd;
1357                 }
1358 
1359             }
1360             break;
1361 
1362         case IV_YUV_420P :
1363             pu1_y_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) +
1364                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
1365 
1366             pu1_u_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) +
1367                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE);
1368 
1369             pu1_v_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[2] + (i4_mb_x * BLK8x8SIZE) +
1370                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[2] * (i4_mb_y * BLK8x8SIZE);
1371 
1372             ps_codec->pf_ih264e_conv_420p_to_420sp(
1373                             pu1_y_buf_base, pu1_u_buf_base, pu1_v_buf_base,
1374                             ps_proc->pu1_src_buf_luma,
1375                             ps_proc->pu1_src_buf_chroma, u2_num_rows,
1376                             ps_codec->s_cfg.u4_disp_wd,
1377                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[0],
1378                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[1],
1379                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[2],
1380                             ps_proc->i4_src_strd, ps_proc->i4_src_chroma_strd,
1381                             convert_uv_only);
1382             break;
1383 
1384         case IV_YUV_422ILE :
1385             pu1_y_buf_base =  (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE * 2)
1386                               + ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
1387 
1388             ps_codec->pf_ih264e_fmt_conv_422i_to_420sp(
1389                             ps_proc->pu1_src_buf_luma,
1390                             ps_proc->pu1_src_buf_chroma,
1391                             ps_proc->pu1_src_buf_chroma + 1, pu1_y_buf_base,
1392                             ps_codec->s_cfg.u4_disp_wd, u2_num_rows,
1393                             ps_proc->i4_src_strd, ps_proc->i4_src_chroma_strd,
1394                             ps_proc->i4_src_chroma_strd,
1395                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] >> 1);
1396             break;
1397 
1398         default:
1399             break;
1400     }
1401 
1402     if (u4_pad_right_sz && (ps_proc->i4_mb_x == 0))
1403     {
1404         UWORD32 u4_pad_wd, u4_pad_ht;
1405         u4_pad_wd = (UWORD32)(ps_proc->i4_src_strd - ps_codec->s_cfg.u4_disp_wd);
1406         u4_pad_wd = MIN(u4_pad_right_sz, u4_pad_wd);
1407         u4_pad_ht = MB_SIZE;
1408         if(ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
1409             u4_pad_ht = MIN(MB_SIZE, (MB_SIZE - u4_pad_bottom_sz));
1410 
1411         ih264_pad_right_luma(
1412                         ps_proc->pu1_src_buf_luma + ps_codec->s_cfg.u4_disp_wd,
1413                         ps_proc->i4_src_strd, u4_pad_ht, u4_pad_wd);
1414 
1415         ih264_pad_right_chroma(
1416                         ps_proc->pu1_src_buf_chroma + ps_codec->s_cfg.u4_disp_wd,
1417                         ps_proc->i4_src_chroma_strd, u4_pad_ht / 2, u4_pad_wd);
1418     }
1419 
1420     if (ps_proc->i4_mb_y && ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) {
1421         UWORD8 *pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] +
1422                         ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE) -
1423                         ps_proc->s_inp_buf.s_raw_buf.au4_strd[0];
1424         UWORD8 *pu1_dst = ps_proc->pu1_src_buf_luma - ps_proc->i4_src_strd;
1425         memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_disp_wd);
1426         if (u4_pad_right_sz && (ps_proc->i4_mb_x == 0)) {
1427             pu1_dst += ps_codec->s_cfg.u4_disp_wd;
1428             memset(pu1_dst, pu1_dst[-1], u4_pad_right_sz);
1429         }
1430     }
1431 
1432     /* pad bottom edge */
1433     if (u4_pad_bottom_sz && (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) && ps_proc->i4_mb_x == 0)
1434     {
1435         ih264_pad_bottom(ps_proc->pu1_src_buf_luma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_strd,
1436                          ps_proc->i4_src_strd, ps_proc->i4_src_strd, u4_pad_bottom_sz);
1437 
1438         ih264_pad_bottom(ps_proc->pu1_src_buf_chroma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_chroma_strd / 2,
1439                          ps_proc->i4_src_chroma_strd, ps_proc->i4_src_chroma_strd, (u4_pad_bottom_sz / 2));
1440     }
1441 
1442 
1443     /* packed mb coeff data */
1444     ps_proc->pv_mb_coeff_data = ((UWORD8 *)ps_proc->pv_pic_mb_coeff_data) + i4_mb_y * ps_codec->u4_size_coeff_data;
1445 
1446     /* packed mb header data */
1447     ps_proc->pv_mb_header_data = ((UWORD8 *)ps_proc->pv_pic_mb_header_data) + i4_mb_y * ps_codec->u4_size_header_data;
1448 
1449     /* slice index */
1450     ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[i4_mb_y * ps_proc->i4_wd_mbs + i4_mb_x];
1451 
1452     /*********************************************************************/
1453     /* ih264e_init_quant_params() routine is called at the pic init level*/
1454     /* this would have initialized the qp.                               */
1455     /* TODO_LATER: currently it is assumed that quant params donot change*/
1456     /* across mb's. When they do calculate update ps_qp_params accordingly*/
1457     /*********************************************************************/
1458 
1459     /* init mv buffer ptr */
1460     ps_proc->ps_pu = ps_cur_mv_buf->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs *
1461                      ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
1462 
1463     /* Init co-located mv buffer */
1464     ps_proc->ps_colpu = ps_proc->aps_mv_buf[1]->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs *
1465                         ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
1466 
1467     if (i4_mb_y == 0)
1468     {
1469         ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu;
1470     }
1471     else
1472     {
1473         ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu + ((i4_mb_y - 1) * ps_proc->i4_wd_mbs *
1474                                     ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
1475     }
1476 
1477     ps_proc->pu4_mb_pu_cnt = ps_cur_mv_buf->pu4_mb_pu_cnt + (i4_mb_y * ps_proc->i4_wd_mbs);
1478 
1479     /* mb type */
1480     ps_proc->u4_mb_type = I16x16;
1481 
1482     /* lambda */
1483     ps_proc->u4_lambda = gu1_qp0[ps_qp_params->u1_mb_qp];
1484 
1485     /* mb distortion */
1486     ps_proc->i4_mb_distortion = SHRT_MAX;
1487 
1488     if (i4_mb_x == 0)
1489     {
1490         ps_proc->s_left_mb_syntax_ele.i4_mb_distortion = 0;
1491 
1492         ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion = 0;
1493 
1494         ps_proc->s_top_left_mb_syntax_ME.i4_mb_distortion = 0;
1495 
1496         if (i4_mb_y == 0)
1497         {
1498             memset(ps_proc->ps_top_row_mb_syntax_ele, 0, (ps_proc->i4_wd_mbs + 1)*sizeof(mb_info_t));
1499         }
1500     }
1501 
1502     /* mb cost */
1503     ps_proc->i4_mb_cost = INT_MAX;
1504 
1505     /**********************/
1506     /* init deblk context */
1507     /**********************/
1508     ps_deblk->i4_mb_x = ps_proc->i4_mb_x;
1509     /* deblk lags the current mb proc by 1 row */
1510     /* NOTE: Intra prediction has to happen with non deblocked samples used as reference */
1511     /* Hence to deblk MB 0 of row 0, you have wait till MB 0 of row 1 is encoded. */
1512     /* For simplicity, we chose to lag deblking by 1 Row wrt to proc */
1513     ps_deblk->i4_mb_y = ps_proc->i4_mb_y - 1;
1514 
1515     /* buffer ptrs */
1516     ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + i4_rec_strd * (ps_deblk->i4_mb_y * MB_SIZE);
1517     ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + i4_rec_strd * (ps_deblk->i4_mb_y * BLK8x8SIZE);
1518 
1519     /* init deblk bs context */
1520     /* mb indices */
1521     ps_bs->i4_mb_x = ps_proc->i4_mb_x;
1522     ps_bs->i4_mb_y = ps_proc->i4_mb_y;
1523 
1524     /* init n_mb_process  context */
1525     ps_n_mb_ctxt->i4_mb_x = 0;
1526     ps_n_mb_ctxt->i4_mb_y = ps_deblk->i4_mb_y;
1527     ps_n_mb_ctxt->i4_n_mbs = ps_proc->i4_nmb_ntrpy;
1528 
1529     return IH264E_SUCCESS;
1530 }
1531 
1532 /**
1533 *******************************************************************************
1534 *
1535 * @brief This function performs luma & chroma padding
1536 *
1537 * @par Description:
1538 *
1539 * @param[in] ps_proc
1540 *  Process context corresponding to the job
1541 *
1542 * @param[in] pu1_curr_pic_luma
1543 *  Pointer to luma buffer
1544 *
1545 * @param[in] pu1_curr_pic_chroma
1546 *  Pointer to chroma buffer
1547 *
1548 * @param[in] i4_mb_x
1549 *  mb index x
1550 *
1551 * @param[in] i4_mb_y
1552 *  mb index y
1553 *
1554 *  @param[in] i4_pad_ht
1555 *  number of rows to be padded
1556 *
1557 * @returns  error status
1558 *
1559 * @remarks none
1560 *
1561 *******************************************************************************
1562 */
ih264e_pad_recon_buffer(process_ctxt_t * ps_proc,UWORD8 * pu1_curr_pic_luma,UWORD8 * pu1_curr_pic_chroma,WORD32 i4_mb_x,WORD32 i4_mb_y,WORD32 i4_pad_ht)1563 IH264E_ERROR_T ih264e_pad_recon_buffer(process_ctxt_t *ps_proc,
1564                                        UWORD8 *pu1_curr_pic_luma,
1565                                        UWORD8 *pu1_curr_pic_chroma,
1566                                        WORD32 i4_mb_x,
1567                                        WORD32 i4_mb_y,
1568                                        WORD32 i4_pad_ht)
1569 {
1570     /* codec context */
1571     codec_t *ps_codec = ps_proc->ps_codec;
1572 
1573     /* strides */
1574     WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1575 
1576     if (i4_mb_x == 0)
1577     {
1578         /* padding left luma */
1579         ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, i4_pad_ht, PAD_LEFT);
1580 
1581         /* padding left chroma */
1582         ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, i4_pad_ht >> 1, PAD_LEFT);
1583     }
1584     if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1585     {
1586         /* padding right luma */
1587         ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, i4_pad_ht, PAD_RIGHT);
1588 
1589         /* padding right chroma */
1590         ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, i4_pad_ht >> 1, PAD_RIGHT);
1591 
1592         if (i4_mb_y == ps_proc->i4_ht_mbs - 1)
1593         {
1594             UWORD8 *pu1_rec_luma = pu1_curr_pic_luma + MB_SIZE + PAD_RIGHT + ((i4_pad_ht - 1) * i4_rec_strd);
1595             UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma + MB_SIZE + PAD_RIGHT + (((i4_pad_ht >> 1) - 1) * i4_rec_strd);
1596 
1597             /* padding bottom luma */
1598             ps_codec->pf_pad_bottom(pu1_rec_luma, i4_rec_strd, i4_rec_strd, PAD_BOT);
1599 
1600             /* padding bottom chroma */
1601             ps_codec->pf_pad_bottom(pu1_rec_chroma, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1));
1602         }
1603     }
1604 
1605     if (i4_mb_y == 0)
1606     {
1607         UWORD8 *pu1_rec_luma = pu1_curr_pic_luma;
1608         UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma;
1609         WORD32 wd = MB_SIZE;
1610 
1611         if (i4_mb_x == 0)
1612         {
1613             pu1_rec_luma -= PAD_LEFT;
1614             pu1_rec_chroma -= PAD_LEFT;
1615 
1616             wd += PAD_LEFT;
1617         }
1618         if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1619         {
1620             wd += PAD_RIGHT;
1621         }
1622 
1623         /* padding top luma */
1624         ps_codec->pf_pad_top(pu1_rec_luma, i4_rec_strd, wd, PAD_TOP);
1625 
1626         /* padding top chroma */
1627         ps_codec->pf_pad_top(pu1_rec_chroma, i4_rec_strd, wd, (PAD_TOP >> 1));
1628     }
1629 
1630     return IH264E_SUCCESS;
1631 }
1632 
1633 
1634 
1635 
1636 /**
1637 *******************************************************************************
1638 *
1639 * @brief This function performs deblocking, padding and halfpel generation for
1640 *  'n' MBs
1641 *
1642 * @par Description:
1643 *
1644 * @param[in] ps_proc
1645 *  Process context corresponding to the job
1646 *
1647 * @param[in] pu1_curr_pic_luma
1648 * Current MB being processed(Luma)
1649 *
1650 * @param[in] pu1_curr_pic_chroma
1651 * Current MB being processed(Chroma)
1652 *
1653 * @param[in] i4_mb_x
1654 * Column value of current MB processed
1655 *
1656 * @param[in] i4_mb_y
1657 * Curent row processed
1658 *
1659 * @returns  error status
1660 *
1661 * @remarks none
1662 *
1663 *******************************************************************************
1664 */
ih264e_dblk_pad_hpel_processing_n_mbs(process_ctxt_t * ps_proc,UWORD8 * pu1_curr_pic_luma,UWORD8 * pu1_curr_pic_chroma,WORD32 i4_mb_x,WORD32 i4_mb_y)1665 IH264E_ERROR_T ih264e_dblk_pad_hpel_processing_n_mbs(process_ctxt_t *ps_proc,
1666                                                      UWORD8 *pu1_curr_pic_luma,
1667                                                      UWORD8 *pu1_curr_pic_chroma,
1668                                                      WORD32 i4_mb_x,
1669                                                      WORD32 i4_mb_y)
1670 {
1671     /* codec context */
1672     codec_t *ps_codec = ps_proc->ps_codec;
1673 
1674     /* n_mb processing context */
1675     n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt;
1676 
1677     /* deblk context */
1678     deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
1679 
1680     /* strides */
1681     WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1682 
1683     /* loop variables */
1684     WORD32 row, i, j, col;
1685 
1686     /* Padding Width */
1687     UWORD32 u4_pad_wd;
1688 
1689     /* deblk_map of the row being deblocked */
1690     UWORD8 *pu1_deblk_map = ps_proc->pu1_deblk_map + ps_deblk->i4_mb_y * ps_proc->i4_wd_mbs;
1691 
1692     /* deblk_map_previous row */
1693     UWORD8 *pu1_deblk_map_prev_row = pu1_deblk_map - ps_proc->i4_wd_mbs;
1694 
1695     WORD32 u4_pad_top = 0;
1696 
1697     WORD32 u4_deblk_prev_row = 0;
1698 
1699     /* Number of mbs to be processed */
1700     WORD32 i4_n_mbs = ps_n_mb_ctxt->i4_n_mbs;
1701 
1702     /* Number of mbs  actually processed
1703      * (at the end of a row, when remaining number of MBs are less than i4_n_mbs) */
1704     WORD32 i4_n_mb_process_count = 0;
1705 
1706     UWORD8 *pu1_pad_bottom_src = NULL;
1707 
1708     UWORD8 *pu1_pad_src_luma = NULL;
1709     UWORD8 *pu1_pad_src_chroma = NULL;
1710 
1711     if (ps_proc->u4_disable_deblock_level == 1)
1712     {
1713         /* If left most MB is processed, then pad left */
1714         if (i4_mb_x == 0)
1715         {
1716             /* padding left luma */
1717             ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1718 
1719             /* padding left chroma */
1720             ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT);
1721         }
1722         /*last col*/
1723         if (i4_mb_x == (ps_proc->i4_wd_mbs - 1))
1724         {
1725             /* padding right luma */
1726             ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1727 
1728             /* padding right chroma */
1729             ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT);
1730         }
1731     }
1732 
1733     if ((i4_mb_y > 0) || (i4_mb_y == (ps_proc->i4_ht_mbs - 1)))
1734     {
1735         /* if number of mb's to be processed are less than 'N', go back.
1736          * exception to the above clause is end of row */
1737         if ( ((i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1)) < i4_n_mbs) && (i4_mb_x < (ps_proc->i4_wd_mbs - 1)) )
1738         {
1739             return IH264E_SUCCESS;
1740         }
1741         else
1742         {
1743             i4_n_mb_process_count = MIN(i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1), i4_n_mbs);
1744 
1745             /* performing deblocking for required number of MBs */
1746             if ((i4_mb_y > 0) && (ps_proc->u4_disable_deblock_level != 1))
1747             {
1748                 u4_deblk_prev_row = 1;
1749 
1750                 /* checking whether the top rows are deblocked */
1751                 for (col = 0; col < i4_n_mb_process_count; col++)
1752                 {
1753                     u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + col];
1754                 }
1755 
1756                 /* checking whether the top right MB is deblocked */
1757                 if ((ps_deblk->i4_mb_x + i4_n_mb_process_count) != ps_proc->i4_wd_mbs)
1758                 {
1759                     u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + i4_n_mb_process_count];
1760                 }
1761 
1762                 /* Top or Top right MBs not deblocked */
1763                 if ((u4_deblk_prev_row != 1) && (i4_mb_y > 0))
1764                 {
1765                     return IH264E_SUCCESS;
1766                 }
1767 
1768                 for (row = 0; row < i4_n_mb_process_count; row++)
1769                 {
1770                     ih264e_deblock_mb(ps_proc, ps_deblk);
1771 
1772                     pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1773 
1774                     if (ps_deblk->i4_mb_y > 0)
1775                     {
1776                         if (ps_deblk->i4_mb_x == 0)/* If left most MB is processed, then pad left*/
1777                         {
1778                             /* padding left luma */
1779                             ps_codec->pf_pad_left_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE, i4_rec_strd, MB_SIZE, PAD_LEFT);
1780 
1781                             /* padding left chroma */
1782                             ps_codec->pf_pad_left_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT);
1783                         }
1784 
1785                         if (ps_deblk->i4_mb_x == (ps_proc->i4_wd_mbs - 1))/*last column*/
1786                         {
1787                             /* padding right luma */
1788                             ps_codec->pf_pad_right_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1789 
1790                             /* padding right chroma */
1791                             ps_codec->pf_pad_right_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT);
1792                         }
1793                     }
1794                     ps_deblk->i4_mb_x++;
1795 
1796                     ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1797                     ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1798 
1799                 }
1800             }
1801             else if(i4_mb_y > 0)
1802             {
1803                 ps_deblk->i4_mb_x += i4_n_mb_process_count;
1804 
1805                 ps_deblk->pu1_cur_pic_luma += i4_n_mb_process_count * MB_SIZE;
1806                 ps_deblk->pu1_cur_pic_chroma += i4_n_mb_process_count * MB_SIZE;
1807             }
1808 
1809             if (i4_mb_y == 2)
1810             {
1811                 u4_pad_wd = i4_n_mb_process_count * MB_SIZE;
1812                 u4_pad_top = ps_n_mb_ctxt->i4_mb_x * MB_SIZE;
1813 
1814                 if (ps_n_mb_ctxt->i4_mb_x == 0)
1815                 {
1816                     u4_pad_wd += PAD_LEFT;
1817                     u4_pad_top = -PAD_LEFT;
1818                 }
1819 
1820                 if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1821                 {
1822                     u4_pad_wd += PAD_RIGHT;
1823                 }
1824 
1825                 /* padding top luma */
1826                 ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_luma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, PAD_TOP);
1827 
1828                 /* padding top chroma */
1829                 ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_chroma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, (PAD_TOP >> 1));
1830             }
1831 
1832             ps_n_mb_ctxt->i4_mb_x += i4_n_mb_process_count;
1833 
1834             if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1835             {
1836                 if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
1837                 {
1838                     /* Bottom Padding is done in one stretch for the entire width */
1839                     if (ps_proc->u4_disable_deblock_level != 1)
1840                     {
1841                         ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * MB_SIZE;
1842 
1843                         ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * BLK8x8SIZE;
1844 
1845                         ps_n_mb_ctxt->i4_mb_x = 0;
1846                         ps_n_mb_ctxt->i4_mb_y = ps_proc->i4_mb_y;
1847                         ps_deblk->i4_mb_x = 0;
1848                         ps_deblk->i4_mb_y = ps_proc->i4_mb_y;
1849 
1850                         /* update pic qp map (as update_proc_ctxt is still not called for the last MB) */
1851                         ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp;
1852 
1853                         i4_n_mb_process_count = (ps_proc->i4_wd_mbs) % i4_n_mbs;
1854 
1855                         j = (ps_proc->i4_wd_mbs) / i4_n_mbs;
1856 
1857                         for (i = 0; i < j; i++)
1858                         {
1859                             for (col = 0; col < i4_n_mbs; col++)
1860                             {
1861                                 ih264e_deblock_mb(ps_proc, ps_deblk);
1862 
1863                                 pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1864 
1865                                 ps_deblk->i4_mb_x++;
1866                                 ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1867                                 ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1868                                 ps_n_mb_ctxt->i4_mb_x++;
1869                             }
1870                         }
1871 
1872                         for (col = 0; col < i4_n_mb_process_count; col++)
1873                         {
1874                             ih264e_deblock_mb(ps_proc, ps_deblk);
1875 
1876                             pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1877 
1878                             ps_deblk->i4_mb_x++;
1879                             ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1880                             ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1881                             ps_n_mb_ctxt->i4_mb_x++;
1882                         }
1883 
1884                         pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd;
1885 
1886                         pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd;
1887 
1888                         /* padding left luma */
1889                         ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1890 
1891                         /* padding left chroma */
1892                         ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT);
1893 
1894                         pu1_pad_src_luma += i4_rec_strd * MB_SIZE;
1895                         pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE;
1896 
1897                         /* padding left luma */
1898                         ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1899 
1900                         /* padding left chroma */
1901                         ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT);
1902 
1903                         pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE;
1904 
1905                         pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE;
1906 
1907                         /* padding right luma */
1908                         ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1909 
1910                         /* padding right chroma */
1911                         ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT);
1912 
1913                         pu1_pad_src_luma += i4_rec_strd * MB_SIZE;
1914                         pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE;
1915 
1916                         /* padding right luma */
1917                         ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1918 
1919                         /* padding right chroma */
1920                         ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT);
1921 
1922                     }
1923 
1924                     /* In case height is less than 2 MBs pad top */
1925                     if (ps_proc->i4_ht_mbs <= 2)
1926                     {
1927                         UWORD8 *pu1_pad_top_src;
1928                         /* padding top luma */
1929                         pu1_pad_top_src = ps_proc->pu1_rec_buf_luma_base - PAD_LEFT;
1930                         ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, PAD_TOP);
1931 
1932                         /* padding top chroma */
1933                         pu1_pad_top_src = ps_proc->pu1_rec_buf_chroma_base - PAD_LEFT;
1934                         ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, (PAD_TOP >> 1));
1935                     }
1936 
1937                     /* padding bottom luma */
1938                     pu1_pad_bottom_src = ps_proc->pu1_rec_buf_luma_base + ps_proc->i4_ht_mbs * MB_SIZE * i4_rec_strd - PAD_LEFT;
1939                     ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, PAD_BOT);
1940 
1941                     /* padding bottom chroma */
1942                     pu1_pad_bottom_src = ps_proc->pu1_rec_buf_chroma_base + ps_proc->i4_ht_mbs * (MB_SIZE >> 1) * i4_rec_strd - PAD_LEFT;
1943                     ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1));
1944                 }
1945             }
1946         }
1947     }
1948 
1949     return IH264E_SUCCESS;
1950 }
1951 
1952 
1953 /**
1954 *******************************************************************************
1955 *
1956 * @brief This function performs luma & chroma core coding for a set of mb's.
1957 *
1958 * @par Description:
1959 *  The mb to be coded is taken and is evaluated over a predefined set of modes
1960 *  (intra (i16, i4, i8)/inter (mv, skip)) for best cost. The mode with least cost
1961 *  is selected and using intra/inter prediction filters, prediction is carried out.
1962 *  The deviation between src and pred signal constitutes error signal. This error
1963 *  signal is transformed (hierarchical transform if necessary) and quantized. The
1964 *  quantized residue is packed in to entropy buffer for entropy coding. This is
1965 *  repeated for all the mb's enlisted under the job.
1966 *
1967 * @param[in] ps_proc
1968 *  Process context corresponding to the job
1969 *
1970 * @returns  error status
1971 *
1972 * @remarks none
1973 *
1974 *******************************************************************************
1975 */
ih264e_process(process_ctxt_t * ps_proc)1976 WORD32 ih264e_process(process_ctxt_t *ps_proc)
1977 {
1978     /* error status */
1979     WORD32 error_status = IH264_SUCCESS;
1980 
1981     /* codec context */
1982     codec_t *ps_codec = ps_proc->ps_codec;
1983 
1984     /* cbp luma, chroma */
1985     UWORD32 u4_cbp_l, u4_cbp_c;
1986 
1987     /* width in mbs */
1988     WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs;
1989 
1990     /* loop var */
1991     WORD32  i4_mb_idx, i4_mb_cnt = ps_proc->i4_mb_cnt;
1992 
1993     /* valid modes */
1994     UWORD32 u4_valid_modes = 0;
1995 
1996     /* gate threshold */
1997     WORD32 i4_gate_threshold = 0;
1998 
1999     /* is intra */
2000     WORD32 luma_idx, chroma_idx, is_intra;
2001 
2002     /* temp variables */
2003     WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt % MAX_CTXT_SETS;
2004 
2005     /*
2006      * list of modes for evaluation
2007      * -------------------------------------------------------------------------
2008      * Note on enabling I4x4 and I16x16
2009      * At very low QP's the hadamard transform in I16x16 will push up the maximum
2010      * coeff value very high. CAVLC may not be able to represent the value and
2011      * hence the stream may not be decodable in some clips.
2012      * Hence at low QPs, we will enable I4x4 and disable I16x16 irrespective of preset.
2013      */
2014     if (ps_proc->i4_slice_type == ISLICE)
2015     {
2016         if (ps_proc->u4_frame_qp > 10)
2017         {
2018             /* enable intra 16x16 */
2019             u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
2020 
2021             /* enable intra 8x8 */
2022             u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_8x8 ? (1 << I8x8) : 0;
2023         }
2024 
2025         /* enable intra 4x4 */
2026         u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
2027         u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
2028 
2029     }
2030     else if (ps_proc->i4_slice_type == PSLICE)
2031     {
2032         if (ps_proc->u4_frame_qp > 10)
2033         {
2034             /* enable intra 16x16 */
2035             u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
2036         }
2037 
2038         /* enable intra 4x4 */
2039         if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
2040         {
2041             u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
2042         }
2043         u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
2044 
2045         /* enable inter P16x16 */
2046         u4_valid_modes |= (1 << P16x16);
2047     }
2048     else if (ps_proc->i4_slice_type == BSLICE)
2049     {
2050         if (ps_proc->u4_frame_qp > 10)
2051         {
2052             /* enable intra 16x16 */
2053             u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
2054         }
2055 
2056         /* enable intra 4x4 */
2057         if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
2058         {
2059             u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
2060         }
2061         u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
2062 
2063         /* enable inter B16x16 */
2064         u4_valid_modes |= (1 << B16x16);
2065     }
2066 
2067 
2068     /* init entropy */
2069     ps_proc->s_entropy.i4_mb_x = ps_proc->i4_mb_x;
2070     ps_proc->s_entropy.i4_mb_y = ps_proc->i4_mb_y;
2071     ps_proc->s_entropy.i4_mb_cnt = MIN(ps_proc->i4_nmb_ntrpy, i4_wd_mbs - ps_proc->i4_mb_x);
2072 
2073     /* compute recon when :
2074      *   1. current frame is to be used as a reference
2075      *   2. dump recon for bit stream sanity check
2076      */
2077     ps_proc->u4_compute_recon = ps_codec->u4_is_curr_frm_ref ||
2078                                 ps_codec->s_cfg.u4_enable_recon ||
2079                                 ps_codec->s_cfg.u4_enable_quality_metrics & QUALITY_MASK_PSNR;
2080 
2081     /* Encode 'n' macroblocks,
2082      * 'n' being the number of mbs dictated by current proc ctxt */
2083     for (i4_mb_idx = 0; i4_mb_idx < i4_mb_cnt; i4_mb_idx ++)
2084     {
2085         /* since we have not yet found sad, we have not yet got min sad */
2086         /* we need to initialize these variables for each MB */
2087         /* TODO how to get the min sad into the codec */
2088         ps_proc->u4_min_sad = ps_codec->s_cfg.i4_min_sad;
2089         ps_proc->u4_min_sad_reached = 0;
2090 
2091         /* mb analysis */
2092         {
2093             /* temp var */
2094             WORD32 i4_mb_id = ps_proc->i4_mb_x + ps_proc->i4_mb_y * i4_wd_mbs;
2095 
2096             /* force intra refresh ? */
2097             WORD32 i4_air_enable_inter = (ps_codec->s_cfg.e_air_mode == IVE_AIR_MODE_NONE) ||
2098                             (ps_codec->pu2_intr_rfrsh_map[i4_mb_id] != ps_codec->i4_air_pic_cnt);
2099 
2100             /* evaluate inter 16x16 modes */
2101             if ((u4_valid_modes & (1 << P16x16)) || (u4_valid_modes & (1 << B16x16)))
2102             {
2103                 /* compute nmb me */
2104                 if (ps_proc->i4_mb_x % ps_proc->u4_nmb_me == 0)
2105                 {
2106                     ih264e_compute_me_nmb(ps_proc, MIN((WORD32)ps_proc->u4_nmb_me,
2107                                                        i4_wd_mbs - ps_proc->i4_mb_x));
2108                 }
2109 
2110                 /* set pointers to ME data appropriately for other modules to use */
2111                 {
2112                     UWORD32 u4_mb_index = ps_proc->i4_mb_x % ps_proc->u4_nmb_me ;
2113 
2114                     /* get the min sad condition for current mb */
2115                     ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached;
2116                     ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad;
2117 
2118                     ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_skip_mv[0]);
2119                     ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_mb_index].s_ngbr_avbl);
2120                     ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_pred_mv[0]);
2121 
2122                     ps_proc->i4_mb_distortion = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_distortion;
2123                     ps_proc->i4_mb_cost = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_cost;
2124                     ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad;
2125                     ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached;
2126                     ps_proc->u4_mb_type = ps_proc->ps_nmb_info[u4_mb_index].u4_mb_type;
2127 
2128                     /* get the best sub pel buffer */
2129                     ps_proc->pu1_best_subpel_buf = ps_proc->ps_nmb_info[u4_mb_index].pu1_best_sub_pel_buf;
2130                     ps_proc->u4_bst_spel_buf_strd = ps_proc->ps_nmb_info[u4_mb_index].u4_bst_spel_buf_strd;
2131                 }
2132                 ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
2133             }
2134             else
2135             {
2136                 /* Derive neighbor availability for the current macroblock */
2137                 ps_proc->ps_ngbr_avbl = &ps_proc->s_ngbr_avbl;
2138 
2139                 ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
2140             }
2141 
2142             /*
2143              * If air says intra, we need to force the following code path to evaluate intra
2144              * The easy way is just to say that the inter cost is too much
2145              */
2146             if (!i4_air_enable_inter)
2147             {
2148                 ps_proc->u4_min_sad_reached = 0;
2149                 ps_proc->i4_mb_cost = INT_MAX;
2150                 ps_proc->i4_mb_distortion = INT_MAX;
2151             }
2152             else if (ps_proc->u4_mb_type == PSKIP)
2153             {
2154                 goto UPDATE_MB_INFO;
2155             }
2156 
2157             /* wait until the proc of [top + 1] mb is computed.
2158              * We wait till the proc dependencies are satisfied */
2159              if(ps_proc->i4_mb_y > 0)
2160              {
2161                 /* proc map */
2162                 UWORD8  *pu1_proc_map_top;
2163 
2164                 pu1_proc_map_top = ps_proc->pu1_proc_map + ((ps_proc->i4_mb_y - 1) * i4_wd_mbs);
2165 
2166                 while (1)
2167                 {
2168                     volatile UWORD8 *pu1_buf;
2169                     WORD32 idx = i4_mb_idx + 1;
2170 
2171                     idx = MIN(idx, ((WORD32)ps_codec->s_cfg.i4_wd_mbs - 1));
2172                     pu1_buf =  pu1_proc_map_top + idx;
2173                     if(*pu1_buf)
2174                         break;
2175                     ithread_yield();
2176                 }
2177             }
2178 
2179             /* If we already have the minimum sad, there is no point in searching for sad again */
2180             if (ps_proc->u4_min_sad_reached == 0 || ps_codec->s_cfg.u4_enc_speed_preset != IVE_FASTEST)
2181             {
2182                 /* intra gating in inter slices */
2183                 /* No need of gating if we want to force intra, we need to find the threshold only if inter is enabled by AIR*/
2184                 if (i4_air_enable_inter && ps_proc->i4_slice_type != ISLICE && ps_codec->u4_inter_gate)
2185                 {
2186                     /* distortion of neighboring blocks */
2187                     WORD32 i4_distortion[4];
2188 
2189                     i4_distortion[0] = ps_proc->s_left_mb_syntax_ele.i4_mb_distortion;
2190 
2191                     i4_distortion[1] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x].i4_mb_distortion;
2192 
2193                     i4_distortion[2] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x + 1].i4_mb_distortion;
2194 
2195                     i4_distortion[3] = ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion;
2196 
2197                     i4_gate_threshold = (i4_distortion[0] + i4_distortion[1] + i4_distortion[2] + i4_distortion[3]) >> 2;
2198 
2199                 }
2200 
2201 
2202                 /* If we are going to force intra we need to evaluate intra irrespective of gating */
2203                 if ( (!i4_air_enable_inter) || ((i4_gate_threshold + 16 *((WORD32) ps_proc->u4_lambda)) < ps_proc->i4_mb_distortion))
2204                 {
2205                     /* evaluate intra 4x4 modes */
2206                     if (u4_valid_modes & (1 << I4x4))
2207                     {
2208                         if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
2209                         {
2210                             ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton(ps_proc);
2211                         }
2212                         else
2213                         {
2214                             ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff(ps_proc);
2215                         }
2216                     }
2217 
2218                     /* evaluate intra 16x16 modes */
2219                     if (u4_valid_modes & (1 << I16x16))
2220                     {
2221                         ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(ps_proc);
2222                     }
2223 
2224                     /* evaluate intra 8x8 modes */
2225                     if (u4_valid_modes & (1 << I8x8))
2226                     {
2227                         ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(ps_proc);
2228                     }
2229 
2230                 }
2231         }
2232      }
2233 
2234         /* is intra */
2235         if (ps_proc->u4_mb_type == I4x4 || ps_proc->u4_mb_type == I16x16 || ps_proc->u4_mb_type == I8x8)
2236         {
2237             luma_idx = ps_proc->u4_mb_type;
2238             chroma_idx = 0;
2239             is_intra = 1;
2240 
2241             /* evaluate chroma blocks for intra */
2242             ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(ps_proc);
2243         }
2244         else
2245         {
2246             luma_idx = 3;
2247             chroma_idx = 1;
2248             is_intra = 0;
2249         }
2250         ps_proc->u4_is_intra = is_intra;
2251         ps_proc->ps_pu->b1_intra_flag = is_intra;
2252 
2253         /* redo MV pred of neighbors in the case intra mb */
2254         /* TODO : currently called unconditionally, needs to be called only in the case of intra
2255          * to modify neighbors */
2256         if (ps_proc->i4_slice_type != ISLICE)
2257         {
2258             ih264e_mv_pred(ps_proc, ps_proc->i4_slice_type);
2259         }
2260 
2261         /* Perform luma mb core coding */
2262         u4_cbp_l = (ps_codec->luma_energy_compaction)[luma_idx](ps_proc);
2263 
2264         /* Perform luma mb core coding */
2265         u4_cbp_c = (ps_codec->chroma_energy_compaction)[chroma_idx](ps_proc);
2266 
2267         /* coded block pattern */
2268         ps_proc->u4_cbp = (u4_cbp_c << 4) | u4_cbp_l;
2269 
2270         if (!ps_proc->u4_is_intra)
2271         {
2272             if (ps_proc->i4_slice_type == BSLICE)
2273             {
2274                 if (ih264e_find_bskip_params(ps_proc, PRED_L0))
2275                 {
2276                     ps_proc->u4_mb_type = (ps_proc->u4_cbp) ? BDIRECT : BSKIP;
2277                 }
2278             }
2279             else if(!ps_proc->u4_cbp)
2280             {
2281                 if (ih264e_find_pskip_params(ps_proc, PRED_L0))
2282                 {
2283                     ps_proc->u4_mb_type = PSKIP;
2284                 }
2285             }
2286         }
2287 
2288 UPDATE_MB_INFO:
2289 
2290         /* Update mb sad, mb qp and intra mb cost. Will be used by rate control */
2291         ih264e_update_rc_mb_info(&ps_proc->s_frame_info, ps_proc);
2292 
2293         /**********************************************************************/
2294         /* if disable deblock level is '0' this implies enable deblocking for */
2295         /* all edges of all macroblocks with out any restrictions             */
2296         /*                                                                    */
2297         /* if disable deblock level is '1' this implies disable deblocking for*/
2298         /* all edges of all macroblocks with out any restrictions             */
2299         /*                                                                    */
2300         /* if disable deblock level is '2' this implies enable deblocking for */
2301         /* all edges of all macroblocks except edges overlapping with slice   */
2302         /* boundaries. This option is not currently supported by the encoder  */
2303         /* hence the slice map should be of no significance to perform debloc */
2304         /* king                                                               */
2305         /**********************************************************************/
2306 
2307         if (ps_proc->u4_compute_recon)
2308         {
2309             /* deblk context */
2310             /* src pointers */
2311             UWORD8 *pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma;
2312             UWORD8 *pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma;
2313 
2314             /* src indices */
2315             UWORD32 i4_mb_x = ps_proc->i4_mb_x;
2316             UWORD32 i4_mb_y = ps_proc->i4_mb_y;
2317 
2318             /* compute blocking strength */
2319             if (ps_proc->u4_disable_deblock_level != 1)
2320             {
2321                 ih264e_compute_bs(ps_proc);
2322             }
2323 
2324             /* nmb deblocking and hpel and padding */
2325             ih264e_dblk_pad_hpel_processing_n_mbs(ps_proc, pu1_cur_pic_luma,
2326                                                   pu1_cur_pic_chroma, i4_mb_x,
2327                                                   i4_mb_y);
2328         }
2329 
2330         /* update the context after for coding next mb */
2331         error_status = ih264e_update_proc_ctxt(ps_proc);
2332         if(error_status != IH264E_SUCCESS)
2333         {
2334             return error_status;
2335         }
2336         /* Once the last row is processed, mark the buffer status appropriately */
2337         if (ps_proc->i4_ht_mbs == ps_proc->i4_mb_y)
2338         {
2339             /* Pointer to current picture buffer structure */
2340             pic_buf_t *ps_cur_pic = ps_proc->ps_cur_pic;
2341 
2342             /* Pointer to current picture's mv buffer structure */
2343             mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf;
2344 
2345             /**********************************************************************/
2346             /* if disable deblock level is '0' this implies enable deblocking for */
2347             /* all edges of all macroblocks with out any restrictions             */
2348             /*                                                                    */
2349             /* if disable deblock level is '1' this implies disable deblocking for*/
2350             /* all edges of all macroblocks with out any restrictions             */
2351             /*                                                                    */
2352             /* if disable deblock level is '2' this implies enable deblocking for */
2353             /* all edges of all macroblocks except edges overlapping with slice   */
2354             /* boundaries. This option is not currently supported by the encoder  */
2355             /* hence the slice map should be of no significance to perform debloc */
2356             /* king                                                               */
2357             /**********************************************************************/
2358             error_status = ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr,
2359                                                 ps_cur_mv_buf->i4_buf_id , BUF_MGR_CODEC);
2360             if(error_status != IH264E_SUCCESS)
2361             {
2362                 return error_status;
2363             }
2364             error_status = ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr,
2365                                                 ps_cur_pic->i4_buf_id , BUF_MGR_CODEC);
2366             if(error_status != IH264E_SUCCESS)
2367             {
2368                 return error_status;
2369             }
2370             if (ps_codec->s_cfg.u4_enable_recon)
2371             {
2372                 /* pic cnt */
2373                 ps_codec->as_rec_buf[ctxt_sel].i4_pic_cnt = ps_proc->i4_pic_cnt;
2374 
2375                 /* rec buffers */
2376                 ps_codec->as_rec_buf[ctxt_sel].s_pic_buf  = *ps_proc->ps_cur_pic;
2377 
2378                 /* is last? */
2379                 ps_codec->as_rec_buf[ctxt_sel].u4_is_last = ps_proc->s_entropy.u4_is_last;
2380 
2381                 /* frame time stamp */
2382                 ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_high = ps_proc->s_entropy.u4_timestamp_high;
2383                 ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_low = ps_proc->s_entropy.u4_timestamp_low;
2384             }
2385 
2386         }
2387     }
2388 
2389     DEBUG_HISTOGRAM_DUMP(ps_codec->s_cfg.i4_ht_mbs == ps_proc->i4_mb_y);
2390 
2391     return error_status;
2392 }
2393 
2394 /**
2395 *******************************************************************************
2396 *
2397 * @brief
2398 *  Function to update rc context after encoding
2399 *
2400 * @par   Description
2401 *  This function updates the rate control context after the frame is encoded.
2402 *  Number of bits consumed by the current frame, frame distortion, frame cost,
2403 *  number of intra/inter mb's, ... are passed on to rate control context for
2404 *  updating the rc model.
2405 *
2406 * @param[in] ps_codec
2407 *  Handle to codec context
2408 *
2409 * @param[in] ctxt_sel
2410 *  frame context selector
2411 *
2412 * @param[in] pic_cnt
2413 *  pic count
2414 *
2415 * @returns i4_stuffing_byte
2416 *  number of stuffing bytes (if necessary)
2417 *
2418 * @remarks
2419 *
2420 *******************************************************************************
2421 */
ih264e_update_rc_post_enc(codec_t * ps_codec,WORD32 ctxt_sel,WORD32 i4_is_first_frm)2422 WORD32 ih264e_update_rc_post_enc(codec_t *ps_codec, WORD32 ctxt_sel, WORD32 i4_is_first_frm)
2423 {
2424     /* proc set base idx */
2425     WORD32 i4_proc_ctxt_sel_base = ctxt_sel ? (MAX_PROCESS_CTXT / 2) : 0;
2426 
2427     /* proc ctxt */
2428     process_ctxt_t *ps_proc = &ps_codec->as_process[i4_proc_ctxt_sel_base];
2429 
2430     /* frame qp */
2431     UWORD8 u1_frame_qp = ps_codec->u4_frame_qp;
2432 
2433     /* cbr rc return status */
2434     WORD32 i4_stuffing_byte = 0;
2435 
2436     /* current frame stats */
2437     frame_info_t s_frame_info;
2438     picture_type_e rc_pic_type;
2439 
2440     /* temp var */
2441     WORD32 i, j;
2442 
2443     /********************************************************************/
2444     /*                            BEGIN INIT                            */
2445     /********************************************************************/
2446 
2447     /* init frame info */
2448     irc_init_frame_info(&s_frame_info);
2449 
2450     /* get frame info */
2451     for (i = 0; i < (WORD32)ps_codec->s_cfg.u4_num_cores; i++)
2452     {
2453         /*****************************************************************/
2454         /* One frame can be encoded by max of u4_num_cores threads       */
2455         /* Accumulating the num mbs, sad, qp and intra_mb_cost from      */
2456         /* u4_num_cores threads                                          */
2457         /*****************************************************************/
2458         for (j = 0; j< MAX_MB_TYPE; j++)
2459         {
2460             s_frame_info.num_mbs[j] += ps_proc[i].s_frame_info.num_mbs[j];
2461 
2462             s_frame_info.tot_mb_sad[j] += ps_proc[i].s_frame_info.tot_mb_sad[j];
2463 
2464             s_frame_info.qp_sum[j] += ps_proc[i].s_frame_info.qp_sum[j];
2465         }
2466 
2467         s_frame_info.intra_mb_cost_sum += ps_proc[i].s_frame_info.intra_mb_cost_sum;
2468 
2469         s_frame_info.activity_sum += ps_proc[i].s_frame_info.activity_sum;
2470 
2471         /*****************************************************************/
2472         /* gather number of residue and header bits consumed by the frame*/
2473         /*****************************************************************/
2474         ih264e_update_rc_bits_info(&s_frame_info, &ps_proc[i].s_entropy);
2475     }
2476 
2477     /* get pic type */
2478     switch (ps_codec->pic_type)
2479     {
2480         case PIC_I:
2481         case PIC_IDR:
2482             rc_pic_type = I_PIC;
2483             break;
2484         case PIC_P:
2485             rc_pic_type = P_PIC;
2486             break;
2487         case PIC_B:
2488             rc_pic_type = B_PIC;
2489             break;
2490         default:
2491             assert(0);
2492             break;
2493     }
2494 
2495     /* update rc lib with current frame stats */
2496     i4_stuffing_byte =  ih264e_rc_post_enc(ps_codec->s_rate_control.pps_rate_control_api,
2497                                           &(s_frame_info),
2498                                           ps_codec->s_rate_control.pps_pd_frm_rate,
2499                                           ps_codec->s_rate_control.pps_time_stamp,
2500                                           ps_codec->s_rate_control.pps_frame_time,
2501                                           (ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs),
2502                                           &rc_pic_type,
2503                                           i4_is_first_frm,
2504                                           &ps_codec->s_rate_control.post_encode_skip[ctxt_sel],
2505                                           u1_frame_qp,
2506                                           &ps_codec->s_rate_control.num_intra_in_prev_frame,
2507                                           &ps_codec->s_rate_control.i4_avg_activity);
2508     return i4_stuffing_byte;
2509 }
2510 
2511 /**
2512 *******************************************************************************
2513 *
2514 * @brief
2515 *  entry point of a spawned encoder thread
2516 *
2517 * @par Description:
2518 *  The encoder thread dequeues a proc/entropy job from the encoder queue and
2519 *  calls necessary routines.
2520 *
2521 * @param[in] pv_proc
2522 *  Process context corresponding to the thread
2523 *
2524 * @returns  error status
2525 *
2526 * @remarks
2527 *
2528 *******************************************************************************
2529 */
ih264e_process_thread(void * pv_proc)2530 WORD32 ih264e_process_thread(void *pv_proc)
2531 {
2532     /* error status */
2533     IH264_ERROR_T ret = IH264_SUCCESS;
2534     WORD32 error_status = IH264_SUCCESS;
2535 
2536     /* proc ctxt */
2537     process_ctxt_t *ps_proc = pv_proc;
2538 
2539     /* codec ctxt */
2540     codec_t *ps_codec = ps_proc->ps_codec;
2541 
2542     /* structure to represent a processing job entry */
2543     job_t s_job;
2544 
2545     /* blocking call : entropy dequeue is non-blocking till all
2546      * the proc jobs are processed */
2547     WORD32 is_blocking = 0;
2548 
2549     /* set affinity */
2550     ithread_set_affinity(ps_proc->i4_id);
2551 
2552     ps_proc->i4_error_code = IH264_SUCCESS;
2553     while(1)
2554     {
2555         /* dequeue a job from the entropy queue */
2556         {
2557             int error = ithread_mutex_lock(ps_codec->pv_entropy_mutex);
2558 
2559             /* codec context selector */
2560             WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
2561 
2562             volatile UWORD32 *pu4_buf = &ps_codec->au4_entropy_thread_active[ctxt_sel];
2563 
2564             /* have the lock */
2565             if (error == 0)
2566             {
2567                 if (*pu4_buf == 0)
2568                 {
2569                     /* no entropy threads are active, try dequeuing a job from the entropy queue */
2570                     ret = ih264_list_dequeue(ps_proc->pv_entropy_jobq, &s_job, is_blocking);
2571                     if (IH264_SUCCESS == ret)
2572                     {
2573                         *pu4_buf = 1;
2574                         ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2575                         goto WORKER;
2576                     }
2577                     else if(is_blocking)
2578                     {
2579                         ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2580                         break;
2581                     }
2582                 }
2583                 ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2584             }
2585         }
2586 
2587         /* dequeue a job from the process queue */
2588         ret = ih264_list_dequeue(ps_proc->pv_proc_jobq, &s_job, 1);
2589         if (IH264_SUCCESS != ret)
2590         {
2591             if(ps_proc->i4_id)
2592                 break;
2593             else
2594             {
2595                 is_blocking = 1;
2596                 continue;
2597             }
2598         }
2599 
2600 WORKER:
2601         /* choose appropriate proc context based on proc_base_idx */
2602         ps_proc = &ps_codec->as_process[ps_proc->i4_id + s_job.i2_proc_base_idx];
2603 
2604         switch (s_job.i4_cmd)
2605         {
2606             case CMD_PROCESS:
2607                 ps_proc->i4_mb_cnt = s_job.i2_mb_cnt;
2608                 ps_proc->i4_mb_x = s_job.i2_mb_x;
2609                 ps_proc->i4_mb_y = s_job.i2_mb_y;
2610 
2611                 /* init process context */
2612                 ih264e_init_proc_ctxt(ps_proc);
2613 
2614                 /* core code all mbs enlisted under the current job */
2615                 error_status = ih264e_process(ps_proc);
2616                 if(error_status !=IH264_SUCCESS)
2617                 {
2618                     ps_proc->i4_error_code = error_status;
2619                     return ret;
2620                 }
2621                 break;
2622 
2623             case CMD_ENTROPY:
2624                 ps_proc->s_entropy.i4_mb_x = s_job.i2_mb_x;
2625                 ps_proc->s_entropy.i4_mb_y = s_job.i2_mb_y;
2626                 ps_proc->s_entropy.i4_mb_cnt = s_job.i2_mb_cnt;
2627 
2628                 /* init entropy */
2629                 ih264e_init_entropy_ctxt(ps_proc);
2630 
2631                 /* entropy code all mbs enlisted under the current job */
2632                 error_status = ih264e_entropy(ps_proc);
2633                 if(error_status !=IH264_SUCCESS)
2634                 {
2635                     ps_proc->i4_error_code = error_status;
2636                     return ret;
2637                 }
2638                 break;
2639 
2640             default:
2641                 ps_proc->i4_error_code = IH264_FAIL;
2642                 return ret;
2643         }
2644     }
2645 
2646     return ret;
2647 }
2648