• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /******************************************************************************
2  *
3  * Copyright (C) 2015 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 
21 /**
22 *******************************************************************************
23 * @file
24 *  ih264e_process.c
25 *
26 * @brief
27 *  Contains functions for codec thread
28 *
29 * @author
30 *  Harish
31 *
32 * @par List of Functions:
33 * - ih264e_generate_sps_pps()
34 * - ih264e_init_entropy_ctxt()
35 * - ih264e_entropy()
36 * - ih264e_pack_header_data()
37 * - ih264e_update_proc_ctxt()
38 * - ih264e_init_proc_ctxt()
39 * - ih264e_pad_recon_buffer()
40 * - ih264e_dblk_pad_hpel_processing_n_mbs()
41 * - ih264e_process()
42 * - ih264e_set_rc_pic_params()
43 * - ih264e_update_rc_post_enc()
44 * - ih264e_process_thread()
45 *
46 * @remarks
47 *  None
48 *
49 *******************************************************************************
50 */
51 
52 /*****************************************************************************/
53 /* File Includes                                                             */
54 /*****************************************************************************/
55 
56 /* System include files */
57 #include <stdio.h>
58 #include <stddef.h>
59 #include <stdlib.h>
60 #include <string.h>
61 #include <limits.h>
62 #include <assert.h>
63 
64 /* User include files */
65 #include "ih264_typedefs.h"
66 #include "iv2.h"
67 #include "ive2.h"
68 #include "ih264_defs.h"
69 #include "ih264_debug.h"
70 #include "ime_distortion_metrics.h"
71 #include "ime_defs.h"
72 #include "ime_structs.h"
73 #include "ih264_error.h"
74 #include "ih264_structs.h"
75 #include "ih264_trans_quant_itrans_iquant.h"
76 #include "ih264_inter_pred_filters.h"
77 #include "ih264_mem_fns.h"
78 #include "ih264_padding.h"
79 #include "ih264_intra_pred_filters.h"
80 #include "ih264_deblk_edge_filters.h"
81 #include "ih264_cabac_tables.h"
82 #include "ih264_platform_macros.h"
83 #include "ih264_macros.h"
84 #include "ih264_buf_mgr.h"
85 #include "ih264e_error.h"
86 #include "ih264e_bitstream.h"
87 #include "ih264_common_tables.h"
88 #include "ih264_list.h"
89 #include "ih264e_defs.h"
90 #include "irc_cntrl_param.h"
91 #include "irc_frame_info_collector.h"
92 #include "ih264e_rate_control.h"
93 #include "ih264e_cabac_structs.h"
94 #include "ih264e_structs.h"
95 #include "ih264e_cabac.h"
96 #include "ih264e_process.h"
97 #include "ithread.h"
98 #include "ih264e_intra_modes_eval.h"
99 #include "ih264e_encode_header.h"
100 #include "ih264e_globals.h"
101 #include "ih264e_config.h"
102 #include "ih264e_trace.h"
103 #include "ih264e_statistics.h"
104 #include "ih264_cavlc_tables.h"
105 #include "ih264e_cavlc.h"
106 #include "ih264e_deblk.h"
107 #include "ih264e_me.h"
108 #include "ih264e_debug.h"
109 #include "ih264e_master.h"
110 #include "ih264e_utils.h"
111 #include "irc_mem_req_and_acq.h"
112 #include "irc_rate_control_api.h"
113 #include "ih264e_platform_macros.h"
114 #include "ime_statistics.h"
115 
116 
117 /*****************************************************************************/
118 /* Function Definitions                                                      */
119 /*****************************************************************************/
120 
121 /**
122 ******************************************************************************
123 *
124 *  @brief This function generates sps, pps set on request
125 *
126 *  @par   Description
127 *  When the encoder is set in header generation mode, the following function
128 *  is called. This generates sps and pps headers and returns the control back
129 *  to caller.
130 *
131 *  @param[in]    ps_codec
132 *  pointer to codec context
133 *
134 *  @return      success or failure error code
135 *
136 ******************************************************************************
137 */
ih264e_generate_sps_pps(codec_t * ps_codec)138 IH264E_ERROR_T ih264e_generate_sps_pps(codec_t *ps_codec)
139 {
140     /* choose between ping-pong process buffer set */
141     WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
142 
143     /* entropy ctxt */
144     entropy_ctxt_t *ps_entropy = &ps_codec->as_process[ctxt_sel * MAX_PROCESS_THREADS].s_entropy;
145 
146     /* Bitstream structure */
147     bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm;
148 
149     /* sps */
150     sps_t *ps_sps = NULL;
151 
152     /* pps */
153     pps_t *ps_pps = NULL;
154 
155     /* output buff */
156     out_buf_t *ps_out_buf = &ps_codec->as_out_buf[ctxt_sel];
157 
158 
159     /********************************************************************/
160     /*      initialize the bit stream buffer                            */
161     /********************************************************************/
162     ih264e_bitstrm_init(ps_bitstrm, ps_out_buf->s_bits_buf.pv_buf, ps_out_buf->s_bits_buf.u4_bufsize);
163 
164     /********************************************************************/
165     /*                    BEGIN HEADER GENERATION                       */
166     /********************************************************************/
167     /*ps_codec->i4_pps_id ++;*/
168     ps_codec->i4_pps_id %= MAX_PPS_CNT;
169 
170     /*ps_codec->i4_sps_id ++;*/
171     ps_codec->i4_sps_id %= MAX_SPS_CNT;
172 
173     /* populate sps header */
174     ps_sps = ps_codec->ps_sps_base + ps_codec->i4_sps_id;
175     ih264e_populate_sps(ps_codec, ps_sps);
176 
177     /* populate pps header */
178     ps_pps = ps_codec->ps_pps_base + ps_codec->i4_pps_id;
179     ih264e_populate_pps(ps_codec, ps_pps);
180 
181     ps_entropy->i4_error_code = IH264E_SUCCESS;
182 
183     /* generate sps */
184     ps_entropy->i4_error_code |= ih264e_generate_sps(ps_bitstrm, ps_sps,
185                                                      &ps_codec->s_cfg.s_vui);
186 
187     /* generate pps */
188     ps_entropy->i4_error_code |= ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps);
189 
190     /* queue output buffer */
191     ps_out_buf->s_bits_buf.u4_bytes = ps_bitstrm->u4_strm_buf_offset;
192 
193     return ps_entropy->i4_error_code;
194 }
195 
196 /**
197 *******************************************************************************
198 *
199 * @brief   initialize entropy context.
200 *
201 * @par Description:
202 *  Before invoking the call to perform to entropy coding the entropy context
203 *  associated with the job needs to be initialized. This involves the start
204 *  mb address, end mb address, slice index and the pointer to location at
205 *  which the mb residue info and mb header info are packed.
206 *
207 * @param[in] ps_proc
208 *  Pointer to the current process context
209 *
210 * @returns error status
211 *
212 * @remarks none
213 *
214 *******************************************************************************
215 */
ih264e_init_entropy_ctxt(process_ctxt_t * ps_proc)216 IH264E_ERROR_T ih264e_init_entropy_ctxt(process_ctxt_t *ps_proc)
217 {
218     /* codec context */
219     codec_t *ps_codec = ps_proc->ps_codec;
220 
221     /* entropy ctxt */
222     entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
223 
224     /* start address */
225     ps_entropy->i4_mb_start_add = ps_entropy->i4_mb_y * ps_entropy->i4_wd_mbs + ps_entropy->i4_mb_x;
226 
227     /* end address */
228     ps_entropy->i4_mb_end_add = ps_entropy->i4_mb_start_add + ps_entropy->i4_mb_cnt;
229 
230     /* slice index */
231     ps_entropy->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_entropy->i4_mb_start_add];
232 
233     /* sof */
234     /* @ start of frame or start of a new slice, set sof flag */
235     if (ps_entropy->i4_mb_start_add == 0)
236     {
237         ps_entropy->i4_sof = 1;
238     }
239 
240     if (ps_entropy->i4_mb_x == 0)
241     {
242         /* packed mb coeff data */
243         ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) +
244                         ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data;
245 
246         /* packed mb header data */
247         ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) +
248                         ps_entropy->i4_mb_y * ps_codec->u4_size_header_data;
249     }
250 
251     return IH264E_SUCCESS;
252 }
253 
254 /**
255 *******************************************************************************
256 *
257 * @brief entry point for entropy coding
258 *
259 * @par Description
260 *  This function calls lower level functions to perform entropy coding for a
261 *  group (n rows) of mb's. After encoding 1 row of mb's,  the function takes
262 *  back the control, updates the ctxt and calls lower level functions again.
263 *  This process is repeated till all the rows or group of mb's (which ever is
264 *  minimum) are coded
265 *
266 * @param[in] ps_proc
267 *  process context
268 *
269 * @returns  error status
270 *
271 * @remarks
272 *
273 *******************************************************************************
274 */
275 
ih264e_entropy(process_ctxt_t * ps_proc)276 IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc)
277 {
278     /* codec context */
279     codec_t *ps_codec = ps_proc->ps_codec;
280 
281     /* entropy context */
282     entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
283 
284     /* cabac context */
285     cabac_ctxt_t *ps_cabac_ctxt = ps_entropy->ps_cabac;
286 
287     /* sps */
288     sps_t *ps_sps = ps_entropy->ps_sps_base + (ps_entropy->u4_sps_id % MAX_SPS_CNT);
289 
290     /* pps */
291     pps_t *ps_pps = ps_entropy->ps_pps_base + (ps_entropy->u4_pps_id % MAX_PPS_CNT);
292 
293     /* slice header */
294     slice_header_t *ps_slice_hdr = ps_entropy->ps_slice_hdr_base + (ps_entropy->i4_cur_slice_idx % MAX_SLICE_HDR_CNT);
295 
296     /* slice type */
297     WORD32 i4_slice_type = ps_proc->i4_slice_type;
298 
299     /* Bitstream structure */
300     bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm;
301 
302     /* output buff */
303     out_buf_t s_out_buf;
304 
305     /* proc map */
306     UWORD8  *pu1_proc_map;
307 
308     /* entropy map */
309     UWORD8  *pu1_entropy_map_curr;
310 
311     /* proc base idx */
312     WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt % MAX_CTXT_SETS;
313 
314     /* temp var */
315     WORD32 i4_wd_mbs, i4_ht_mbs;
316     UWORD32 u4_mb_cnt, u4_mb_idx, u4_mb_end_idx;
317     WORD32 bitstream_start_offset, bitstream_end_offset;
318     /********************************************************************/
319     /*                            BEGIN INIT                            */
320     /********************************************************************/
321 
322     /* entropy encode start address */
323     u4_mb_idx = ps_entropy->i4_mb_start_add;
324 
325     /* entropy encode end address */
326     u4_mb_end_idx = ps_entropy->i4_mb_end_add;
327 
328     /* width in mbs */
329     i4_wd_mbs = ps_entropy->i4_wd_mbs;
330 
331     /* height in mbs */
332     i4_ht_mbs = ps_entropy->i4_ht_mbs;
333 
334     /* total mb cnt */
335     u4_mb_cnt = i4_wd_mbs * i4_ht_mbs;
336 
337     /* proc map */
338     pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs;
339 
340     /* entropy map */
341     pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs;
342 
343     /********************************************************************/
344     /* @ start of frame / slice,                                        */
345     /*      initialize the output buffer,                               */
346     /*      initialize the bit stream buffer,                           */
347     /*      check if sps and pps headers have to be generated,          */
348     /*      populate and generate slice header                          */
349     /********************************************************************/
350     if (ps_entropy->i4_sof)
351     {
352         /********************************************************************/
353         /*      initialize the output buffer                                */
354         /********************************************************************/
355         s_out_buf = ps_codec->as_out_buf[ctxt_sel];
356 
357         /* is last frame to encode */
358         s_out_buf.u4_is_last = ps_entropy->u4_is_last;
359 
360         /* frame idx */
361         s_out_buf.u4_timestamp_high = ps_entropy->u4_timestamp_high;
362         s_out_buf.u4_timestamp_low = ps_entropy->u4_timestamp_low;
363 
364         /********************************************************************/
365         /*      initialize the bit stream buffer                            */
366         /********************************************************************/
367         ih264e_bitstrm_init(ps_bitstrm, s_out_buf.s_bits_buf.pv_buf, s_out_buf.s_bits_buf.u4_bufsize);
368 
369         /********************************************************************/
370         /*                    BEGIN HEADER GENERATION                       */
371         /********************************************************************/
372         if (1 == ps_entropy->i4_gen_header)
373         {
374             /* generate sps */
375             ps_entropy->i4_error_code |= ih264e_generate_sps(ps_bitstrm, ps_sps,
376                                                              &ps_codec->s_cfg.s_vui);
377             /* generate pps */
378             ps_entropy->i4_error_code |= ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps);
379 
380             /* reset i4_gen_header */
381             ps_entropy->i4_gen_header = 0;
382         }
383 
384         /* populate slice header */
385         ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps, ps_sps);
386 
387         /* generate slice header */
388         ps_entropy->i4_error_code |= ih264e_generate_slice_header(ps_bitstrm, ps_slice_hdr,
389                                                                   ps_pps, ps_sps);
390 
391         /* once start of frame / slice is done, you can reset it */
392         /* it is the responsibility of the caller to set this flag */
393         ps_entropy->i4_sof = 0;
394 
395         if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
396         {
397             BITSTREAM_BYTE_ALIGN(ps_bitstrm);
398             BITSTREAM_FLUSH(ps_bitstrm);
399             ih264e_init_cabac_ctxt(ps_entropy);
400         }
401     }
402 
403     /* begin entropy coding for the mb set */
404     while (u4_mb_idx < u4_mb_end_idx)
405     {
406         /* init ptrs/indices */
407         if (ps_entropy->i4_mb_x == i4_wd_mbs)
408         {
409             ps_entropy->i4_mb_y++;
410             ps_entropy->i4_mb_x = 0;
411 
412             /* packed mb coeff data */
413             ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) +
414                             ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data;
415 
416             /* packed mb header data */
417             ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) +
418                             ps_entropy->i4_mb_y * ps_codec->u4_size_header_data;
419 
420             /* proc map */
421             pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs;
422 
423             /* entropy map */
424             pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs;
425         }
426 
427         DEBUG("\nmb indices x, y %d, %d", ps_entropy->i4_mb_x, ps_entropy->i4_mb_y);
428         ENTROPY_TRACE("mb index x %d", ps_entropy->i4_mb_x);
429         ENTROPY_TRACE("mb index y %d", ps_entropy->i4_mb_y);
430 
431         /* wait until the curr mb is core coded */
432         /* The wait for curr mb to be core coded is essential when entropy is launched
433          * as a separate job
434          */
435         while (1)
436         {
437             volatile UWORD8 *pu1_buf1;
438             WORD32 idx = ps_entropy->i4_mb_x;
439 
440             pu1_buf1 = pu1_proc_map + idx;
441             if (*pu1_buf1)
442                 break;
443             ithread_yield();
444         }
445 
446 
447         /* write mb layer */
448         ps_entropy->i4_error_code |= ps_codec->pf_write_mb_syntax_layer[ps_entropy->u1_entropy_coding_mode_flag][i4_slice_type](ps_entropy);
449         /* Starting bitstream offset for header in bits */
450         bitstream_start_offset = GET_NUM_BITS(ps_bitstrm);
451 
452         /* set entropy map */
453         pu1_entropy_map_curr[ps_entropy->i4_mb_x] = 1;
454 
455         u4_mb_idx++;
456         ps_entropy->i4_mb_x++;
457         /* check for eof */
458         if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
459         {
460             if (ps_entropy->i4_mb_x < i4_wd_mbs)
461             {
462                 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
463             }
464         }
465 
466         if (ps_entropy->i4_mb_x == i4_wd_mbs)
467         {
468             /* if slices are enabled */
469             if (ps_codec->s_cfg.e_slice_mode == IVE_SLICE_MODE_BLOCKS)
470             {
471                 /* current slice index */
472                 WORD32 i4_curr_slice_idx = ps_entropy->i4_cur_slice_idx;
473 
474                 /* slice map */
475                 UWORD8 *pu1_slice_idx = ps_entropy->pu1_slice_idx;
476 
477                 /* No need to open a slice at end of frame. The current slice can be closed at the time
478                  * of signaling eof flag.
479                  */
480                 if ((u4_mb_idx != u4_mb_cnt) && (i4_curr_slice_idx
481                                                 != pu1_slice_idx[u4_mb_idx]))
482                 {
483                     if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag)
484                     { /* mb skip run */
485                         if ((i4_slice_type != ISLICE)
486                                         && *ps_entropy->pi4_mb_skip_run)
487                         {
488                             if (*ps_entropy->pi4_mb_skip_run)
489                             {
490                             PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run, ps_entropy->i4_error_code, "mb skip run");
491                                 *ps_entropy->pi4_mb_skip_run = 0;
492                             }
493                         }
494                         /* put rbsp trailing bits for the previous slice */
495                                  ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm);
496                     }
497                     else
498                     {
499                         ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1);
500                     }
501 
502                     /* update slice header pointer */
503                     i4_curr_slice_idx = pu1_slice_idx[u4_mb_idx];
504                     ps_entropy->i4_cur_slice_idx = i4_curr_slice_idx;
505                     ps_slice_hdr = ps_entropy->ps_slice_hdr_base+ (i4_curr_slice_idx % MAX_SLICE_HDR_CNT);
506 
507                     /* populate slice header */
508                     ps_entropy->i4_mb_start_add = u4_mb_idx;
509                     ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps,
510                                                  ps_sps);
511 
512                     /* generate slice header */
513                     ps_entropy->i4_error_code |= ih264e_generate_slice_header(
514                                     ps_bitstrm, ps_slice_hdr, ps_pps, ps_sps);
515                     if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
516                     {
517                         BITSTREAM_BYTE_ALIGN(ps_bitstrm);
518                         BITSTREAM_FLUSH(ps_bitstrm);
519                         ih264e_init_cabac_ctxt(ps_entropy);
520                     }
521                 }
522                 else
523                 {
524                     if (CABAC == ps_entropy->u1_entropy_coding_mode_flag
525                                     && u4_mb_idx != u4_mb_cnt)
526                     {
527                         ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
528                     }
529                 }
530             }
531             /* Dont execute any further instructions until store synchronization took place */
532             DATA_SYNC();
533         }
534 
535         /* Ending bitstream offset for header in bits */
536         bitstream_end_offset = GET_NUM_BITS(ps_bitstrm);
537         ps_entropy->u4_header_bits[i4_slice_type == PSLICE] +=
538                         bitstream_end_offset - bitstream_start_offset;
539     }
540 
541     /* check for eof */
542     if (u4_mb_idx == u4_mb_cnt)
543     {
544         /* set end of frame flag */
545         ps_entropy->i4_eof = 1;
546     }
547     else
548     {
549         if (CABAC == ps_entropy->u1_entropy_coding_mode_flag
550                         && ps_codec->s_cfg.e_slice_mode
551                                         != IVE_SLICE_MODE_BLOCKS)
552         {
553             ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
554         }
555     }
556 
557     if (ps_entropy->i4_eof)
558     {
559         if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag)
560         {
561             /* mb skip run */
562             if ((i4_slice_type != ISLICE) && *ps_entropy->pi4_mb_skip_run)
563             {
564                 if (*ps_entropy->pi4_mb_skip_run)
565                 {
566                     PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run,
567                                  ps_entropy->i4_error_code, "mb skip run");
568                     *ps_entropy->pi4_mb_skip_run = 0;
569                 }
570             }
571             /* put rbsp trailing bits */
572              ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm);
573         }
574         else
575         {
576             ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1);
577         }
578 
579         /* update current frame stats to rc library */
580         {
581             /* number of bytes to stuff */
582             WORD32 i4_stuff_bytes;
583 
584             /* update */
585             i4_stuff_bytes = ih264e_update_rc_post_enc(
586                             ps_codec, ctxt_sel,
587                             (ps_proc->ps_codec->i4_poc == 0));
588 
589             /* cbr rc - house keeping */
590             if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel])
591             {
592                 ps_entropy->ps_bitstrm->u4_strm_buf_offset = 0;
593             }
594             else if (i4_stuff_bytes)
595             {
596                 /* add filler nal units */
597                 ps_entropy->i4_error_code |= ih264e_add_filler_nal_unit(ps_bitstrm, i4_stuff_bytes);
598             }
599         }
600 
601         /*
602          *Frame number is to be incremented only if the current frame is a
603          * reference frame. After each successful frame encode, we increment
604          * frame number by 1
605          */
606         if (!ps_codec->s_rate_control.post_encode_skip[ctxt_sel]
607                         && ps_codec->u4_is_curr_frm_ref)
608         {
609             ps_codec->i4_frame_num++;
610         }
611         /********************************************************************/
612         /*      signal the output                                           */
613         /********************************************************************/
614         ps_codec->as_out_buf[ctxt_sel].s_bits_buf.u4_bytes =
615                         ps_entropy->ps_bitstrm->u4_strm_buf_offset;
616 
617         DEBUG("entropy status %x", ps_entropy->i4_error_code);
618     }
619 
620     /* allow threads to dequeue entropy jobs */
621     ps_codec->au4_entropy_thread_active[ctxt_sel] = 0;
622 
623     return ps_entropy->i4_error_code;
624 }
625 
626 /**
627 *******************************************************************************
628 *
629 * @brief Packs header information of a mb in to a buffer
630 *
631 * @par Description:
632 *  After the deciding the mode info of a macroblock, the syntax elements
633 *  associated with the mb are packed and stored. The entropy thread unpacks
634 *  this buffer and generates the end bit stream.
635 *
636 * @param[in] ps_proc
637 *  Pointer to the current process context
638 *
639 * @returns error status
640 *
641 * @remarks none
642 *
643 *******************************************************************************
644 */
ih264e_pack_header_data(process_ctxt_t * ps_proc)645 IH264E_ERROR_T ih264e_pack_header_data(process_ctxt_t *ps_proc)
646 {
647     /* curr mb type */
648     UWORD32 u4_mb_type = ps_proc->u4_mb_type;
649 
650     /* pack mb syntax layer of curr mb (used for entropy coding) */
651     if (u4_mb_type == I4x4)
652     {
653         /* pointer to mb header storage space */
654         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
655 
656         /* temp var */
657         WORD32 i4, byte;
658 
659         /* mb type plus mode */
660         *pu1_ptr++ = (ps_proc->u1_c_i8_mode << 6) + u4_mb_type;
661 
662         /* cbp */
663         *pu1_ptr++ = ps_proc->u4_cbp;
664 
665         /* mb qp delta */
666         *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
667 
668         /* sub mb modes */
669         for (i4 = 0; i4 < 16; i4 ++)
670         {
671             byte = 0;
672 
673             if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] ==
674                             ps_proc->au1_intra_luma_mb_4x4_modes[i4])
675             {
676                 byte |= 1;
677             }
678             else
679             {
680 
681                 if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] <
682                                 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4])
683                 {
684                     byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 1);
685                 }
686                 else
687                 {
688                     byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 1;
689                 }
690             }
691 
692             i4++;
693 
694             if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] ==
695                             ps_proc->au1_intra_luma_mb_4x4_modes[i4])
696             {
697                 byte |= 16;
698             }
699             else
700             {
701 
702                 if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] <
703                                 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4])
704                 {
705                     byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 5);
706                 }
707                 else
708                 {
709                     byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 5;
710                 }
711             }
712 
713             *pu1_ptr++ = byte;
714         }
715 
716         /* end of mb layer */
717         ps_proc->pv_mb_header_data = pu1_ptr;
718     }
719     else if (u4_mb_type == I16x16)
720     {
721         /* pointer to mb header storage space */
722         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
723 
724         /* mb type plus mode */
725         *pu1_ptr++ = (ps_proc->u1_c_i8_mode << 6) + (ps_proc->u1_l_i16_mode << 4) + u4_mb_type;
726 
727         /* cbp */
728         *pu1_ptr++ = ps_proc->u4_cbp;
729 
730         /* mb qp delta */
731         *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
732 
733         /* end of mb layer */
734         ps_proc->pv_mb_header_data = pu1_ptr;
735     }
736     else if (u4_mb_type == P16x16)
737     {
738         /* pointer to mb header storage space */
739         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
740 
741         WORD16 *i2_mv_ptr;
742 
743         /* mb type plus mode */
744         *pu1_ptr++ = u4_mb_type;
745 
746         /* cbp */
747         *pu1_ptr++ = ps_proc->u4_cbp;
748 
749         /* mb qp delta */
750         *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
751 
752         i2_mv_ptr = (WORD16 *)pu1_ptr;
753 
754         *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx - ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
755 
756         *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy - ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
757 
758         /* end of mb layer */
759         ps_proc->pv_mb_header_data = i2_mv_ptr;
760     }
761     else if (u4_mb_type == PSKIP)
762     {
763         /* pointer to mb header storage space */
764         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
765 
766         /* mb type plus mode */
767         *pu1_ptr++ = u4_mb_type;
768 
769         /* end of mb layer */
770         ps_proc->pv_mb_header_data = pu1_ptr;
771     }
772     else if(u4_mb_type == B16x16)
773     {
774 
775         /* pointer to mb header storage space */
776         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
777 
778         WORD16 *i2_mv_ptr;
779 
780         UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode;
781 
782         /* mb type plus mode */
783         *pu1_ptr++ = (u4_pred_mode << 4) + u4_mb_type;
784 
785         /* cbp */
786         *pu1_ptr++ = ps_proc->u4_cbp;
787 
788         /* mb qp delta */
789         *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
790 
791         /* l0 & l1 me data */
792         i2_mv_ptr = (WORD16 *)pu1_ptr;
793 
794         if (u4_pred_mode != PRED_L1)
795         {
796             *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx
797                             - ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
798 
799             *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy
800                             - ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
801         }
802         if (u4_pred_mode != PRED_L0)
803         {
804             *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvx
805                             - ps_proc->ps_pred_mv[1].s_mv.i2_mvx;
806 
807             *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvy
808                             - ps_proc->ps_pred_mv[1].s_mv.i2_mvy;
809         }
810 
811         /* end of mb layer */
812         ps_proc->pv_mb_header_data = i2_mv_ptr;
813 
814     }
815     else if(u4_mb_type == BDIRECT)
816     {
817         /* pointer to mb header storage space */
818         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
819 
820         /* mb type plus mode */
821         *pu1_ptr++ = u4_mb_type;
822 
823         /* cbp */
824         *pu1_ptr++ = ps_proc->u4_cbp;
825 
826         /* mb qp delta */
827         *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
828 
829         ps_proc->pv_mb_header_data = pu1_ptr;
830 
831     }
832     else if(u4_mb_type == BSKIP)
833     {
834         UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode;
835 
836         /* pointer to mb header storage space */
837         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
838 
839         /* mb type plus mode */
840         *pu1_ptr++ = (u4_pred_mode << 4) + u4_mb_type;
841 
842         /* end of mb layer */
843         ps_proc->pv_mb_header_data = pu1_ptr;
844     }
845 
846     return IH264E_SUCCESS;
847 }
848 
849 /**
850 *******************************************************************************
851 *
852 * @brief   update process context after encoding an mb. This involves preserving
853 * the current mb information for later use, initialize the proc ctxt elements to
854 * encode next mb.
855 *
856 * @par Description:
857 *  This function performs house keeping tasks after encoding an mb.
858 *  After encoding an mb, various elements of the process context needs to be
859 *  updated to encode the next mb. For instance, the source, recon and reference
860 *  pointers, mb indices have to be adjusted to the next mb. The slice index of
861 *  the current mb needs to be updated. If mb qp modulation is enabled, then if
862 *  the qp changes the quant param structure needs to be updated. Also to encoding
863 *  the next mb, the current mb info is used as part of mode prediction or mv
864 *  prediction. Hence the current mb info has to preserved at top/top left/left
865 *  locations.
866 *
867 * @param[in] ps_proc
868 *  Pointer to the current process context
869 *
870 * @returns none
871 *
872 * @remarks none
873 *
874 *******************************************************************************
875 */
ih264e_update_proc_ctxt(process_ctxt_t * ps_proc)876 WORD32 ih264e_update_proc_ctxt(process_ctxt_t *ps_proc)
877 {
878     /* error status */
879     WORD32 error_status = IH264_SUCCESS;
880 
881     /* codec context */
882     codec_t *ps_codec = ps_proc->ps_codec;
883 
884     /* curr mb indices */
885     WORD32 i4_mb_x = ps_proc->i4_mb_x;
886     WORD32 i4_mb_y = ps_proc->i4_mb_y;
887 
888     /* mb syntax elements of neighbors */
889     mb_info_t *ps_left_syn =  &ps_proc->s_left_mb_syntax_ele;
890     mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + i4_mb_x;
891     mb_info_t *ps_top_left_syn = &ps_proc->s_top_left_mb_syntax_ele;
892 
893     /* curr mb type */
894     UWORD32 u4_mb_type = ps_proc->u4_mb_type;
895 
896     /* curr mb type */
897     UWORD32 u4_is_intra = ps_proc->u4_is_intra;
898 
899     /* width in mbs */
900     WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs;
901 
902     /*height in mbs*/
903     WORD32 i4_ht_mbs = ps_proc->i4_ht_mbs;
904 
905     /* proc map */
906     UWORD8 *pu1_proc_map = ps_proc->pu1_proc_map + (i4_mb_y * i4_wd_mbs);
907 
908     /* deblk context */
909     deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
910 
911     /* deblk bs context */
912     bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt);
913 
914     /* top row motion vector info */
915     enc_pu_t *ps_top_row_pu = ps_proc->ps_top_row_pu + i4_mb_x;
916 
917     /* top left mb motion vector */
918     enc_pu_t *ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu;
919 
920     /* left mb motion vector */
921     enc_pu_t *ps_left_mb_pu = &ps_proc->s_left_mb_pu;
922 
923     /* sub mb modes */
924     UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (i4_mb_x << 4);
925 
926     /*************************************************************/
927     /* During MV prediction, when top right mb is not available, */
928     /* top left mb info. is used for prediction. Hence the curr  */
929     /* top, which will be top left for the next mb needs to be   */
930     /* preserved before updating it with curr mb info.           */
931     /*************************************************************/
932 
933     /* mb type, mb class, csbp */
934     *ps_top_left_syn = *ps_top_syn;
935 
936     if (ps_proc->i4_slice_type != ISLICE)
937     {
938         /*****************************************/
939         /* update top left with top info results */
940         /*****************************************/
941         /* mv */
942         *ps_top_left_mb_pu = *ps_top_row_pu;
943     }
944 
945     /*************************************************/
946     /* update top and left with curr mb info results */
947     /*************************************************/
948 
949     /* mb type */
950     ps_left_syn->u2_mb_type = ps_top_syn->u2_mb_type = u4_mb_type;
951 
952     /* mb class */
953     ps_left_syn->u2_is_intra = ps_top_syn->u2_is_intra = u4_is_intra;
954 
955     /* csbp */
956     ps_left_syn->u4_csbp = ps_top_syn->u4_csbp = ps_proc->u4_csbp;
957 
958     /* distortion */
959     ps_left_syn->i4_mb_distortion = ps_top_syn->i4_mb_distortion = ps_proc->i4_mb_distortion;
960 
961     if (u4_is_intra)
962     {
963         /* mb / sub mb modes */
964         if (I16x16 == u4_mb_type)
965         {
966             pu1_top_mb_intra_modes[0] = ps_proc->au1_left_mb_intra_modes[0] = ps_proc->u1_l_i16_mode;
967         }
968         else if (I4x4 == u4_mb_type)
969         {
970             ps_codec->pf_mem_cpy_mul8(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16);
971             ps_codec->pf_mem_cpy_mul8(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16);
972         }
973         else if (I8x8 == u4_mb_type)
974         {
975             memcpy(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4);
976             memcpy(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4);
977         }
978 
979         if ((ps_proc->i4_slice_type == PSLICE) ||(ps_proc->i4_slice_type == BSLICE))
980         {
981             /* mv */
982             *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu);
983         }
984 
985         *ps_proc->pu4_mb_pu_cnt = 1;
986     }
987     else
988     {
989         /* mv */
990         *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu);
991     }
992 
993     /*
994      * Mark that the MB has been coded intra
995      * So that future AIRs can skip it
996      */
997     ps_proc->pu1_is_intra_coded[i4_mb_x + (i4_mb_y * i4_wd_mbs)] = u4_is_intra;
998 
999     /**************************************************/
1000     /* pack mb header info. for entropy coding        */
1001     /**************************************************/
1002     ih264e_pack_header_data(ps_proc);
1003 
1004     /* update previous mb qp */
1005     ps_proc->u4_mb_qp_prev = ps_proc->u4_mb_qp;
1006 
1007     /* store qp */
1008     ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp;
1009 
1010     /*
1011      * We need to sync the cache to make sure that the nmv content of proc
1012      * is updated to cache properly
1013      */
1014     DATA_SYNC();
1015 
1016     /* Just before finishing the row, enqueue the job in to entropy queue.
1017      * The master thread depending on its convenience shall dequeue it and
1018      * performs entropy.
1019      *
1020      * WARN !! Placing this block post proc map update can cause queuing of
1021      * entropy jobs in out of order.
1022      */
1023     if (i4_mb_x == i4_wd_mbs - 1)
1024     {
1025         /* job structures */
1026         job_t s_job;
1027 
1028         /* job class */
1029         s_job.i4_cmd = CMD_ENTROPY;
1030 
1031         /* number of mbs to be processed in the current job */
1032         s_job.i2_mb_cnt = ps_codec->s_cfg.i4_wd_mbs;
1033 
1034         /* job start index x */
1035         s_job.i2_mb_x = 0;
1036 
1037         /* job start index y */
1038         s_job.i2_mb_y = ps_proc->i4_mb_y;
1039 
1040         /* proc base idx */
1041         s_job.i2_proc_base_idx = (ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS) ? (MAX_PROCESS_CTXT / 2) : 0;
1042 
1043         /* queue the job */
1044         error_status |= ih264_list_queue(ps_proc->pv_entropy_jobq, &s_job, 1);
1045 
1046         if(ps_proc->i4_mb_y == (i4_ht_mbs - 1))
1047             ih264_list_terminate(ps_codec->pv_entropy_jobq);
1048     }
1049 
1050     /* update proc map */
1051     pu1_proc_map[i4_mb_x] = 1;
1052 
1053     /**************************************************/
1054     /* update proc ctxt elements for encoding next mb */
1055     /**************************************************/
1056     /* update indices */
1057     i4_mb_x ++;
1058     ps_proc->i4_mb_x = i4_mb_x;
1059 
1060     if (ps_proc->i4_mb_x == i4_wd_mbs)
1061     {
1062         ps_proc->i4_mb_y++;
1063         ps_proc->i4_mb_x = 0;
1064     }
1065 
1066     /* update slice index */
1067     ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_proc->i4_mb_y * i4_wd_mbs + ps_proc->i4_mb_x];
1068 
1069     /* update buffers pointers */
1070     ps_proc->pu1_src_buf_luma += MB_SIZE;
1071     ps_proc->pu1_rec_buf_luma += MB_SIZE;
1072     ps_proc->apu1_ref_buf_luma[0] += MB_SIZE;
1073     ps_proc->apu1_ref_buf_luma[1] += MB_SIZE;
1074 
1075     /*
1076      * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
1077      * the stride per MB is MB_SIZE
1078      */
1079     ps_proc->pu1_src_buf_chroma += MB_SIZE;
1080     ps_proc->pu1_rec_buf_chroma += MB_SIZE;
1081     ps_proc->apu1_ref_buf_chroma[0] += MB_SIZE;
1082     ps_proc->apu1_ref_buf_chroma[1] += MB_SIZE;
1083 
1084 
1085 
1086     /* Reset cost, distortion params */
1087     ps_proc->i4_mb_cost = INT_MAX;
1088     ps_proc->i4_mb_distortion = SHRT_MAX;
1089 
1090     ps_proc->ps_pu += *ps_proc->pu4_mb_pu_cnt;
1091 
1092     ps_proc->pu4_mb_pu_cnt += 1;
1093 
1094     /* Update colocated pu */
1095     if (ps_proc->i4_slice_type == BSLICE)
1096         ps_proc->ps_colpu += *(ps_proc->aps_mv_buf[1]->pu4_mb_pu_cnt +  (i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x);
1097 
1098     /* deblk ctxts */
1099     if (ps_proc->u4_disable_deblock_level != 1)
1100     {
1101         /* indices */
1102         ps_bs->i4_mb_x = ps_proc->i4_mb_x;
1103         ps_bs->i4_mb_y = ps_proc->i4_mb_y;
1104 
1105 #ifndef N_MB_ENABLE /* For N MB processing update take place inside deblocking function */
1106         ps_deblk->i4_mb_x ++;
1107 
1108         ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1109         /*
1110          * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
1111          * the stride per MB is MB_SIZE
1112          */
1113         ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1114 #endif
1115     }
1116 
1117     return error_status;
1118 }
1119 
1120 /**
1121 *******************************************************************************
1122 *
1123 * @brief   initialize process context.
1124 *
1125 * @par Description:
1126 *  Before dispatching the current job to process thread, the process context
1127 *  associated with the job is initialized. Usually every job aims to encode one
1128 *  row of mb's. Basing on the row indices provided by the job, the process
1129 *  context's buffer ptrs, slice indices and other elements that are necessary
1130 *  during core-coding are initialized.
1131 *
1132 * @param[in] ps_proc
1133 *  Pointer to the current process context
1134 *
1135 * @returns error status
1136 *
1137 * @remarks none
1138 *
1139 *******************************************************************************
1140 */
ih264e_init_proc_ctxt(process_ctxt_t * ps_proc)1141 IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc)
1142 {
1143     /* codec context */
1144     codec_t *ps_codec = ps_proc->ps_codec;
1145 
1146     /* nmb processing context*/
1147     n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt;
1148 
1149     /* indices */
1150     WORD32 i4_mb_x, i4_mb_y;
1151 
1152     /* strides */
1153     WORD32 i4_src_strd = ps_proc->i4_src_strd;
1154     WORD32 i4_src_chroma_strd = ps_proc->i4_src_chroma_strd;
1155     WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1156 
1157     /* quant params */
1158     quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
1159 
1160     /* deblk ctxt */
1161     deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
1162 
1163     /* deblk bs context */
1164     bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt);
1165 
1166     /* Pointer to mv_buffer of current frame */
1167     mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf;
1168 
1169     /* Pointers for color space conversion */
1170     UWORD8 *pu1_y_buf_base, *pu1_u_buf_base, *pu1_v_buf_base;
1171 
1172     /* Pad the MB to support non standard sizes */
1173     UWORD32 u4_pad_right_sz = ps_codec->s_cfg.u4_wd - ps_codec->s_cfg.u4_disp_wd;
1174     UWORD32 u4_pad_bottom_sz = ps_codec->s_cfg.u4_ht - ps_codec->s_cfg.u4_disp_ht;
1175     UWORD16 u2_num_rows = MB_SIZE;
1176     WORD32 convert_uv_only;
1177 
1178     /********************************************************************/
1179     /*                            BEGIN INIT                            */
1180     /********************************************************************/
1181 
1182     i4_mb_x = ps_proc->i4_mb_x;
1183     i4_mb_y = ps_proc->i4_mb_y;
1184 
1185     /* Number of mbs processed in one loop of process function */
1186     ps_proc->i4_nmb_ntrpy = ps_proc->i4_wd_mbs;
1187     ps_proc->u4_nmb_me = ps_proc->i4_wd_mbs;
1188 
1189     /* init buffer pointers */
1190     convert_uv_only = 1;
1191     if (u4_pad_bottom_sz || u4_pad_right_sz ||
1192         ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE)
1193     {
1194         if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
1195             u2_num_rows = (UWORD16) MB_SIZE - u4_pad_bottom_sz;
1196         ps_proc->pu1_src_buf_luma_base = ps_codec->pu1_y_csc_buf_base;
1197         i4_src_strd = ps_proc->i4_src_strd = ps_codec->s_cfg.u4_max_wd;
1198         ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * MB_SIZE);
1199         convert_uv_only = 0;
1200     }
1201     else
1202     {
1203         i4_src_strd = ps_proc->i4_src_strd = ps_proc->s_inp_buf.s_raw_buf.au4_strd[0];
1204         ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_src_strd * (i4_mb_y * MB_SIZE);
1205     }
1206 
1207 
1208     if (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE ||
1209         ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420P ||
1210         ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1) ||
1211         u4_pad_bottom_sz || u4_pad_right_sz)
1212     {
1213         if ((ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_UV) ||
1214             (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU))
1215             ps_proc->pu1_src_buf_chroma_base = ps_codec->pu1_uv_csc_buf_base;
1216 
1217         ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * BLK8x8SIZE);
1218         i4_src_chroma_strd = ps_proc->i4_src_chroma_strd = ps_codec->s_cfg.u4_max_wd;
1219     }
1220     else
1221     {
1222         i4_src_chroma_strd = ps_proc->i4_src_chroma_strd = ps_proc->s_inp_buf.s_raw_buf.au4_strd[1];
1223         ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_src_chroma_strd * (i4_mb_y * BLK8x8SIZE);
1224     }
1225 
1226     ps_proc->pu1_rec_buf_luma = ps_proc->pu1_rec_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
1227     ps_proc->pu1_rec_buf_chroma = ps_proc->pu1_rec_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
1228 
1229     /* Tempral back and forward reference buffer */
1230     ps_proc->apu1_ref_buf_luma[0] = ps_proc->apu1_ref_buf_luma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
1231     ps_proc->apu1_ref_buf_chroma[0] = ps_proc->apu1_ref_buf_chroma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
1232     ps_proc->apu1_ref_buf_luma[1] = ps_proc->apu1_ref_buf_luma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
1233     ps_proc->apu1_ref_buf_chroma[1] = ps_proc->apu1_ref_buf_chroma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
1234 
1235     /*
1236      * Do color space conversion
1237      * NOTE : We assume there that the number of MB's to process will not span multiple rows
1238      */
1239     switch (ps_codec->s_cfg.e_inp_color_fmt)
1240     {
1241         case IV_YUV_420SP_UV:
1242         case IV_YUV_420SP_VU:
1243             /* In case of 420 semi-planar input, copy last few rows to intermediate
1244                buffer as chroma trans functions access one extra byte due to interleaved input.
1245                This data will be padded if required */
1246             if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1) || u4_pad_bottom_sz || u4_pad_right_sz)
1247             {
1248                 WORD32 num_rows = MB_SIZE;
1249                 UWORD8 *pu1_src;
1250                 UWORD8 *pu1_dst;
1251                 WORD32 i;
1252                 pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) +
1253                           ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
1254 
1255                 pu1_dst = ps_proc->pu1_src_buf_luma;
1256 
1257                 /* If padding is required, we always copy luma, if padding isn't required we never copy luma. */
1258                 if (u4_pad_bottom_sz || u4_pad_right_sz) {
1259                     if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1))
1260                         num_rows = MB_SIZE - u4_pad_bottom_sz;
1261                     for (i = 0; i < num_rows; i++)
1262                     {
1263                         memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_wd);
1264                         pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[0];
1265                         pu1_dst += ps_proc->i4_src_strd;
1266                     }
1267                 }
1268                 pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) +
1269                           ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE);
1270                 pu1_dst = ps_proc->pu1_src_buf_chroma;
1271 
1272                 /* Last MB row of chroma is copied unconditionally, since trans functions access an extra byte
1273                  * due to interleaved input
1274                  */
1275                 if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1))
1276                     num_rows = (ps_codec->s_cfg.u4_disp_ht >> 1) - (ps_proc->i4_mb_y * BLK8x8SIZE);
1277                 else
1278                     num_rows = BLK8x8SIZE;
1279                 for (i = 0; i < num_rows; i++)
1280                 {
1281                     memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_wd);
1282                     pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[1];
1283                     pu1_dst += ps_proc->i4_src_chroma_strd;
1284                 }
1285 
1286             }
1287             break;
1288 
1289         case IV_YUV_420P :
1290             pu1_y_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) +
1291                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
1292 
1293             pu1_u_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) +
1294                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE);
1295 
1296             pu1_v_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[2] + (i4_mb_x * BLK8x8SIZE) +
1297                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[2] * (i4_mb_y * BLK8x8SIZE);
1298 
1299             ps_codec->pf_ih264e_conv_420p_to_420sp(
1300                             pu1_y_buf_base, pu1_u_buf_base, pu1_v_buf_base,
1301                             ps_proc->pu1_src_buf_luma,
1302                             ps_proc->pu1_src_buf_chroma, u2_num_rows,
1303                             ps_codec->s_cfg.u4_disp_wd,
1304                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[0],
1305                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[1],
1306                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[2],
1307                             ps_proc->i4_src_strd, ps_proc->i4_src_chroma_strd,
1308                             convert_uv_only);
1309             break;
1310 
1311         case IV_YUV_422ILE :
1312             pu1_y_buf_base =  (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE * 2)
1313                               + ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
1314 
1315             ps_codec->pf_ih264e_fmt_conv_422i_to_420sp(
1316                             ps_proc->pu1_src_buf_luma,
1317                             ps_proc->pu1_src_buf_chroma,
1318                             ps_proc->pu1_src_buf_chroma + 1, pu1_y_buf_base,
1319                             ps_codec->s_cfg.u4_disp_wd, u2_num_rows,
1320                             ps_proc->i4_src_strd, ps_proc->i4_src_chroma_strd,
1321                             ps_proc->i4_src_chroma_strd,
1322                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] >> 1);
1323             break;
1324 
1325         default:
1326             break;
1327     }
1328 
1329     if (u4_pad_right_sz && (ps_proc->i4_mb_x == 0))
1330     {
1331         UWORD32 u4_pad_wd, u4_pad_ht;
1332         u4_pad_wd = (UWORD32)(ps_proc->i4_src_strd - ps_codec->s_cfg.u4_disp_wd);
1333         u4_pad_wd = MIN(u4_pad_right_sz, u4_pad_wd);
1334         u4_pad_ht = MB_SIZE;
1335         if(ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
1336             u4_pad_ht = MIN(MB_SIZE, (MB_SIZE - u4_pad_bottom_sz));
1337 
1338         ih264_pad_right_luma(
1339                         ps_proc->pu1_src_buf_luma + ps_codec->s_cfg.u4_disp_wd,
1340                         ps_proc->i4_src_strd, u4_pad_ht, u4_pad_wd);
1341 
1342         ih264_pad_right_chroma(
1343                         ps_proc->pu1_src_buf_chroma + ps_codec->s_cfg.u4_disp_wd,
1344                         ps_proc->i4_src_chroma_strd, u4_pad_ht / 2, u4_pad_wd);
1345     }
1346 
1347     /* pad bottom edge */
1348     if (u4_pad_bottom_sz && (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) && ps_proc->i4_mb_x == 0)
1349     {
1350         ih264_pad_bottom(ps_proc->pu1_src_buf_luma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_strd,
1351                          ps_proc->i4_src_strd, ps_proc->i4_src_strd, u4_pad_bottom_sz);
1352 
1353         ih264_pad_bottom(ps_proc->pu1_src_buf_chroma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_chroma_strd / 2,
1354                          ps_proc->i4_src_chroma_strd, ps_proc->i4_src_chroma_strd, (u4_pad_bottom_sz / 2));
1355     }
1356 
1357 
1358     /* packed mb coeff data */
1359     ps_proc->pv_mb_coeff_data = ((UWORD8 *)ps_proc->pv_pic_mb_coeff_data) + i4_mb_y * ps_codec->u4_size_coeff_data;
1360 
1361     /* packed mb header data */
1362     ps_proc->pv_mb_header_data = ((UWORD8 *)ps_proc->pv_pic_mb_header_data) + i4_mb_y * ps_codec->u4_size_header_data;
1363 
1364     /* slice index */
1365     ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[i4_mb_y * ps_proc->i4_wd_mbs + i4_mb_x];
1366 
1367     /*********************************************************************/
1368     /* ih264e_init_quant_params() routine is called at the pic init level*/
1369     /* this would have initialized the qp.                               */
1370     /* TODO_LATER: currently it is assumed that quant params donot change*/
1371     /* across mb's. When they do calculate update ps_qp_params accordingly*/
1372     /*********************************************************************/
1373 
1374     /* init mv buffer ptr */
1375     ps_proc->ps_pu = ps_cur_mv_buf->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs *
1376                      ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
1377 
1378     /* Init co-located mv buffer */
1379     ps_proc->ps_colpu = ps_proc->aps_mv_buf[1]->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs *
1380                         ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
1381 
1382     if (i4_mb_y == 0)
1383     {
1384         ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu;
1385     }
1386     else
1387     {
1388         ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu + ((i4_mb_y - 1) * ps_proc->i4_wd_mbs *
1389                                     ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
1390     }
1391 
1392     ps_proc->pu4_mb_pu_cnt = ps_cur_mv_buf->pu4_mb_pu_cnt + (i4_mb_y * ps_proc->i4_wd_mbs);
1393 
1394     /* mb type */
1395     ps_proc->u4_mb_type = I16x16;
1396 
1397     /* lambda */
1398     ps_proc->u4_lambda = gu1_qp0[ps_qp_params->u1_mb_qp];
1399 
1400     /* mb distortion */
1401     ps_proc->i4_mb_distortion = SHRT_MAX;
1402 
1403     if (i4_mb_x == 0)
1404     {
1405         ps_proc->s_left_mb_syntax_ele.i4_mb_distortion = 0;
1406 
1407         ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion = 0;
1408 
1409         ps_proc->s_top_left_mb_syntax_ME.i4_mb_distortion = 0;
1410 
1411         if (i4_mb_y == 0)
1412         {
1413             memset(ps_proc->ps_top_row_mb_syntax_ele, 0, (ps_proc->i4_wd_mbs + 1)*sizeof(mb_info_t));
1414         }
1415     }
1416 
1417     /* mb cost */
1418     ps_proc->i4_mb_cost = INT_MAX;
1419 
1420     /**********************/
1421     /* init deblk context */
1422     /**********************/
1423     ps_deblk->i4_mb_x = ps_proc->i4_mb_x;
1424     /* deblk lags the current mb proc by 1 row */
1425     /* NOTE: Intra prediction has to happen with non deblocked samples used as reference */
1426     /* Hence to deblk MB 0 of row 0, you have wait till MB 0 of row 1 is encoded. */
1427     /* For simplicity, we chose to lag deblking by 1 Row wrt to proc */
1428     ps_deblk->i4_mb_y = ps_proc->i4_mb_y - 1;
1429 
1430     /* buffer ptrs */
1431     ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + i4_rec_strd * (ps_deblk->i4_mb_y * MB_SIZE);
1432     ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + i4_rec_strd * (ps_deblk->i4_mb_y * BLK8x8SIZE);
1433 
1434     /* init deblk bs context */
1435     /* mb indices */
1436     ps_bs->i4_mb_x = ps_proc->i4_mb_x;
1437     ps_bs->i4_mb_y = ps_proc->i4_mb_y;
1438 
1439     /* init n_mb_process  context */
1440     ps_n_mb_ctxt->i4_mb_x = 0;
1441     ps_n_mb_ctxt->i4_mb_y = ps_deblk->i4_mb_y;
1442     ps_n_mb_ctxt->i4_n_mbs = ps_proc->i4_nmb_ntrpy;
1443 
1444     return IH264E_SUCCESS;
1445 }
1446 
1447 /**
1448 *******************************************************************************
1449 *
1450 * @brief This function performs luma & chroma padding
1451 *
1452 * @par Description:
1453 *
1454 * @param[in] ps_proc
1455 *  Process context corresponding to the job
1456 *
1457 * @param[in] pu1_curr_pic_luma
1458 *  Pointer to luma buffer
1459 *
1460 * @param[in] pu1_curr_pic_chroma
1461 *  Pointer to chroma buffer
1462 *
1463 * @param[in] i4_mb_x
1464 *  mb index x
1465 *
1466 * @param[in] i4_mb_y
1467 *  mb index y
1468 *
1469 *  @param[in] i4_pad_ht
1470 *  number of rows to be padded
1471 *
1472 * @returns  error status
1473 *
1474 * @remarks none
1475 *
1476 *******************************************************************************
1477 */
ih264e_pad_recon_buffer(process_ctxt_t * ps_proc,UWORD8 * pu1_curr_pic_luma,UWORD8 * pu1_curr_pic_chroma,WORD32 i4_mb_x,WORD32 i4_mb_y,WORD32 i4_pad_ht)1478 IH264E_ERROR_T ih264e_pad_recon_buffer(process_ctxt_t *ps_proc,
1479                                        UWORD8 *pu1_curr_pic_luma,
1480                                        UWORD8 *pu1_curr_pic_chroma,
1481                                        WORD32 i4_mb_x,
1482                                        WORD32 i4_mb_y,
1483                                        WORD32 i4_pad_ht)
1484 {
1485     /* codec context */
1486     codec_t *ps_codec = ps_proc->ps_codec;
1487 
1488     /* strides */
1489     WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1490 
1491     if (i4_mb_x == 0)
1492     {
1493         /* padding left luma */
1494         ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, i4_pad_ht, PAD_LEFT);
1495 
1496         /* padding left chroma */
1497         ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, i4_pad_ht >> 1, PAD_LEFT);
1498     }
1499     if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1500     {
1501         /* padding right luma */
1502         ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, i4_pad_ht, PAD_RIGHT);
1503 
1504         /* padding right chroma */
1505         ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, i4_pad_ht >> 1, PAD_RIGHT);
1506 
1507         if (i4_mb_y == ps_proc->i4_ht_mbs - 1)
1508         {
1509             UWORD8 *pu1_rec_luma = pu1_curr_pic_luma + MB_SIZE + PAD_RIGHT + ((i4_pad_ht - 1) * i4_rec_strd);
1510             UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma + MB_SIZE + PAD_RIGHT + (((i4_pad_ht >> 1) - 1) * i4_rec_strd);
1511 
1512             /* padding bottom luma */
1513             ps_codec->pf_pad_bottom(pu1_rec_luma, i4_rec_strd, i4_rec_strd, PAD_BOT);
1514 
1515             /* padding bottom chroma */
1516             ps_codec->pf_pad_bottom(pu1_rec_chroma, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1));
1517         }
1518     }
1519 
1520     if (i4_mb_y == 0)
1521     {
1522         UWORD8 *pu1_rec_luma = pu1_curr_pic_luma;
1523         UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma;
1524         WORD32 wd = MB_SIZE;
1525 
1526         if (i4_mb_x == 0)
1527         {
1528             pu1_rec_luma -= PAD_LEFT;
1529             pu1_rec_chroma -= PAD_LEFT;
1530 
1531             wd += PAD_LEFT;
1532         }
1533         if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1534         {
1535             wd += PAD_RIGHT;
1536         }
1537 
1538         /* padding top luma */
1539         ps_codec->pf_pad_top(pu1_rec_luma, i4_rec_strd, wd, PAD_TOP);
1540 
1541         /* padding top chroma */
1542         ps_codec->pf_pad_top(pu1_rec_chroma, i4_rec_strd, wd, (PAD_TOP >> 1));
1543     }
1544 
1545     return IH264E_SUCCESS;
1546 }
1547 
1548 
1549 
1550 
1551 /**
1552 *******************************************************************************
1553 *
1554 * @brief This function performs deblocking, padding and halfpel generation for
1555 *  'n' MBs
1556 *
1557 * @par Description:
1558 *
1559 * @param[in] ps_proc
1560 *  Process context corresponding to the job
1561 *
1562 * @param[in] pu1_curr_pic_luma
1563 * Current MB being processed(Luma)
1564 *
1565 * @param[in] pu1_curr_pic_chroma
1566 * Current MB being processed(Chroma)
1567 *
1568 * @param[in] i4_mb_x
1569 * Column value of current MB processed
1570 *
1571 * @param[in] i4_mb_y
1572 * Curent row processed
1573 *
1574 * @returns  error status
1575 *
1576 * @remarks none
1577 *
1578 *******************************************************************************
1579 */
ih264e_dblk_pad_hpel_processing_n_mbs(process_ctxt_t * ps_proc,UWORD8 * pu1_curr_pic_luma,UWORD8 * pu1_curr_pic_chroma,WORD32 i4_mb_x,WORD32 i4_mb_y)1580 IH264E_ERROR_T ih264e_dblk_pad_hpel_processing_n_mbs(process_ctxt_t *ps_proc,
1581                                                      UWORD8 *pu1_curr_pic_luma,
1582                                                      UWORD8 *pu1_curr_pic_chroma,
1583                                                      WORD32 i4_mb_x,
1584                                                      WORD32 i4_mb_y)
1585 {
1586     /* codec context */
1587     codec_t *ps_codec = ps_proc->ps_codec;
1588 
1589     /* n_mb processing context */
1590     n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt;
1591 
1592     /* deblk context */
1593     deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
1594 
1595     /* strides */
1596     WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1597 
1598     /* loop variables */
1599     WORD32 row, i, j, col;
1600 
1601     /* Padding Width */
1602     UWORD32 u4_pad_wd;
1603 
1604     /* deblk_map of the row being deblocked */
1605     UWORD8 *pu1_deblk_map = ps_proc->pu1_deblk_map + ps_deblk->i4_mb_y * ps_proc->i4_wd_mbs;
1606 
1607     /* deblk_map_previous row */
1608     UWORD8 *pu1_deblk_map_prev_row = pu1_deblk_map - ps_proc->i4_wd_mbs;
1609 
1610     WORD32 u4_pad_top = 0;
1611 
1612     WORD32 u4_deblk_prev_row = 0;
1613 
1614     /* Number of mbs to be processed */
1615     WORD32 i4_n_mbs = ps_n_mb_ctxt->i4_n_mbs;
1616 
1617     /* Number of mbs  actually processed
1618      * (at the end of a row, when remaining number of MBs are less than i4_n_mbs) */
1619     WORD32 i4_n_mb_process_count = 0;
1620 
1621     UWORD8 *pu1_pad_bottom_src = NULL;
1622 
1623     UWORD8 *pu1_pad_src_luma = NULL;
1624     UWORD8 *pu1_pad_src_chroma = NULL;
1625 
1626     if (ps_proc->u4_disable_deblock_level == 1)
1627     {
1628         /* If left most MB is processed, then pad left */
1629         if (i4_mb_x == 0)
1630         {
1631             /* padding left luma */
1632             ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1633 
1634             /* padding left chroma */
1635             ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT);
1636         }
1637         /*last col*/
1638         if (i4_mb_x == (ps_proc->i4_wd_mbs - 1))
1639         {
1640             /* padding right luma */
1641             ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1642 
1643             /* padding right chroma */
1644             ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT);
1645         }
1646     }
1647 
1648     if ((i4_mb_y > 0) || (i4_mb_y == (ps_proc->i4_ht_mbs - 1)))
1649     {
1650         /* if number of mb's to be processed are less than 'N', go back.
1651          * exception to the above clause is end of row */
1652         if ( ((i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1)) < i4_n_mbs) && (i4_mb_x < (ps_proc->i4_wd_mbs - 1)) )
1653         {
1654             return IH264E_SUCCESS;
1655         }
1656         else
1657         {
1658             i4_n_mb_process_count = MIN(i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1), i4_n_mbs);
1659 
1660             /* performing deblocking for required number of MBs */
1661             if ((i4_mb_y > 0) && (ps_proc->u4_disable_deblock_level != 1))
1662             {
1663                 u4_deblk_prev_row = 1;
1664 
1665                 /* checking whether the top rows are deblocked */
1666                 for (col = 0; col < i4_n_mb_process_count; col++)
1667                 {
1668                     u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + col];
1669                 }
1670 
1671                 /* checking whether the top right MB is deblocked */
1672                 if ((ps_deblk->i4_mb_x + i4_n_mb_process_count) != ps_proc->i4_wd_mbs)
1673                 {
1674                     u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + i4_n_mb_process_count];
1675                 }
1676 
1677                 /* Top or Top right MBs not deblocked */
1678                 if ((u4_deblk_prev_row != 1) && (i4_mb_y > 0))
1679                 {
1680                     return IH264E_SUCCESS;
1681                 }
1682 
1683                 for (row = 0; row < i4_n_mb_process_count; row++)
1684                 {
1685                     ih264e_deblock_mb(ps_proc, ps_deblk);
1686 
1687                     pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1688 
1689                     if (ps_deblk->i4_mb_y > 0)
1690                     {
1691                         if (ps_deblk->i4_mb_x == 0)/* If left most MB is processed, then pad left*/
1692                         {
1693                             /* padding left luma */
1694                             ps_codec->pf_pad_left_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE, i4_rec_strd, MB_SIZE, PAD_LEFT);
1695 
1696                             /* padding left chroma */
1697                             ps_codec->pf_pad_left_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT);
1698                         }
1699 
1700                         if (ps_deblk->i4_mb_x == (ps_proc->i4_wd_mbs - 1))/*last column*/
1701                         {
1702                             /* padding right luma */
1703                             ps_codec->pf_pad_right_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1704 
1705                             /* padding right chroma */
1706                             ps_codec->pf_pad_right_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT);
1707                         }
1708                     }
1709                     ps_deblk->i4_mb_x++;
1710 
1711                     ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1712                     ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1713 
1714                 }
1715             }
1716             else if(i4_mb_y > 0)
1717             {
1718                 ps_deblk->i4_mb_x += i4_n_mb_process_count;
1719 
1720                 ps_deblk->pu1_cur_pic_luma += i4_n_mb_process_count * MB_SIZE;
1721                 ps_deblk->pu1_cur_pic_chroma += i4_n_mb_process_count * MB_SIZE;
1722             }
1723 
1724             if (i4_mb_y == 2)
1725             {
1726                 u4_pad_wd = i4_n_mb_process_count * MB_SIZE;
1727                 u4_pad_top = ps_n_mb_ctxt->i4_mb_x * MB_SIZE;
1728 
1729                 if (ps_n_mb_ctxt->i4_mb_x == 0)
1730                 {
1731                     u4_pad_wd += PAD_LEFT;
1732                     u4_pad_top = -PAD_LEFT;
1733                 }
1734 
1735                 if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1736                 {
1737                     u4_pad_wd += PAD_RIGHT;
1738                 }
1739 
1740                 /* padding top luma */
1741                 ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_luma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, PAD_TOP);
1742 
1743                 /* padding top chroma */
1744                 ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_chroma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, (PAD_TOP >> 1));
1745             }
1746 
1747             ps_n_mb_ctxt->i4_mb_x += i4_n_mb_process_count;
1748 
1749             if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1750             {
1751                 if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
1752                 {
1753                     /* Bottom Padding is done in one stretch for the entire width */
1754                     if (ps_proc->u4_disable_deblock_level != 1)
1755                     {
1756                         ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * MB_SIZE;
1757 
1758                         ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * BLK8x8SIZE;
1759 
1760                         ps_n_mb_ctxt->i4_mb_x = 0;
1761                         ps_n_mb_ctxt->i4_mb_y = ps_proc->i4_mb_y;
1762                         ps_deblk->i4_mb_x = 0;
1763                         ps_deblk->i4_mb_y = ps_proc->i4_mb_y;
1764 
1765                         /* update pic qp map (as update_proc_ctxt is still not called for the last MB) */
1766                         ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp;
1767 
1768                         i4_n_mb_process_count = (ps_proc->i4_wd_mbs) % i4_n_mbs;
1769 
1770                         j = (ps_proc->i4_wd_mbs) / i4_n_mbs;
1771 
1772                         for (i = 0; i < j; i++)
1773                         {
1774                             for (col = 0; col < i4_n_mbs; col++)
1775                             {
1776                                 ih264e_deblock_mb(ps_proc, ps_deblk);
1777 
1778                                 pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1779 
1780                                 ps_deblk->i4_mb_x++;
1781                                 ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1782                                 ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1783                                 ps_n_mb_ctxt->i4_mb_x++;
1784                             }
1785                         }
1786 
1787                         for (col = 0; col < i4_n_mb_process_count; col++)
1788                         {
1789                             ih264e_deblock_mb(ps_proc, ps_deblk);
1790 
1791                             pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1792 
1793                             ps_deblk->i4_mb_x++;
1794                             ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1795                             ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1796                             ps_n_mb_ctxt->i4_mb_x++;
1797                         }
1798 
1799                         pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd;
1800 
1801                         pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd;
1802 
1803                         /* padding left luma */
1804                         ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1805 
1806                         /* padding left chroma */
1807                         ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT);
1808 
1809                         pu1_pad_src_luma += i4_rec_strd * MB_SIZE;
1810                         pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE;
1811 
1812                         /* padding left luma */
1813                         ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1814 
1815                         /* padding left chroma */
1816                         ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT);
1817 
1818                         pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE;
1819 
1820                         pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE;
1821 
1822                         /* padding right luma */
1823                         ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1824 
1825                         /* padding right chroma */
1826                         ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT);
1827 
1828                         pu1_pad_src_luma += i4_rec_strd * MB_SIZE;
1829                         pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE;
1830 
1831                         /* padding right luma */
1832                         ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1833 
1834                         /* padding right chroma */
1835                         ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT);
1836 
1837                     }
1838 
1839                     /* In case height is less than 2 MBs pad top */
1840                     if (ps_proc->i4_ht_mbs <= 2)
1841                     {
1842                         UWORD8 *pu1_pad_top_src;
1843                         /* padding top luma */
1844                         pu1_pad_top_src = ps_proc->pu1_rec_buf_luma_base - PAD_LEFT;
1845                         ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, PAD_TOP);
1846 
1847                         /* padding top chroma */
1848                         pu1_pad_top_src = ps_proc->pu1_rec_buf_chroma_base - PAD_LEFT;
1849                         ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, (PAD_TOP >> 1));
1850                     }
1851 
1852                     /* padding bottom luma */
1853                     pu1_pad_bottom_src = ps_proc->pu1_rec_buf_luma_base + ps_proc->i4_ht_mbs * MB_SIZE * i4_rec_strd - PAD_LEFT;
1854                     ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, PAD_BOT);
1855 
1856                     /* padding bottom chroma */
1857                     pu1_pad_bottom_src = ps_proc->pu1_rec_buf_chroma_base + ps_proc->i4_ht_mbs * (MB_SIZE >> 1) * i4_rec_strd - PAD_LEFT;
1858                     ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1));
1859                 }
1860             }
1861         }
1862     }
1863 
1864     return IH264E_SUCCESS;
1865 }
1866 
1867 
1868 /**
1869 *******************************************************************************
1870 *
1871 * @brief This function performs luma & chroma core coding for a set of mb's.
1872 *
1873 * @par Description:
1874 *  The mb to be coded is taken and is evaluated over a predefined set of modes
1875 *  (intra (i16, i4, i8)/inter (mv, skip)) for best cost. The mode with least cost
1876 *  is selected and using intra/inter prediction filters, prediction is carried out.
1877 *  The deviation between src and pred signal constitutes error signal. This error
1878 *  signal is transformed (hierarchical transform if necessary) and quantized. The
1879 *  quantized residue is packed in to entropy buffer for entropy coding. This is
1880 *  repeated for all the mb's enlisted under the job.
1881 *
1882 * @param[in] ps_proc
1883 *  Process context corresponding to the job
1884 *
1885 * @returns  error status
1886 *
1887 * @remarks none
1888 *
1889 *******************************************************************************
1890 */
ih264e_process(process_ctxt_t * ps_proc)1891 WORD32 ih264e_process(process_ctxt_t *ps_proc)
1892 {
1893     /* error status */
1894     WORD32 error_status = IH264_SUCCESS;
1895 
1896     /* codec context */
1897     codec_t *ps_codec = ps_proc->ps_codec;
1898 
1899     /* cbp luma, chroma */
1900     UWORD32 u4_cbp_l, u4_cbp_c;
1901 
1902     /* width in mbs */
1903     WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs;
1904 
1905     /* loop var */
1906     WORD32  i4_mb_idx, i4_mb_cnt = ps_proc->i4_mb_cnt;
1907 
1908     /* valid modes */
1909     UWORD32 u4_valid_modes = 0;
1910 
1911     /* gate threshold */
1912     WORD32 i4_gate_threshold = 0;
1913 
1914     /* is intra */
1915     WORD32 luma_idx, chroma_idx, is_intra;
1916 
1917     /* temp variables */
1918     WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt % MAX_CTXT_SETS;
1919 
1920     /*
1921      * list of modes for evaluation
1922      * -------------------------------------------------------------------------
1923      * Note on enabling I4x4 and I16x16
1924      * At very low QP's the hadamard transform in I16x16 will push up the maximum
1925      * coeff value very high. CAVLC may not be able to represent the value and
1926      * hence the stream may not be decodable in some clips.
1927      * Hence at low QPs, we will enable I4x4 and disable I16x16 irrespective of preset.
1928      */
1929     if (ps_proc->i4_slice_type == ISLICE)
1930     {
1931         if (ps_proc->u4_frame_qp > 10)
1932         {
1933             /* enable intra 16x16 */
1934             u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
1935 
1936             /* enable intra 8x8 */
1937             u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_8x8 ? (1 << I8x8) : 0;
1938         }
1939 
1940         /* enable intra 4x4 */
1941         u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
1942         u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
1943 
1944     }
1945     else if (ps_proc->i4_slice_type == PSLICE)
1946     {
1947         if (ps_proc->u4_frame_qp > 10)
1948         {
1949             /* enable intra 16x16 */
1950             u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
1951         }
1952 
1953         /* enable intra 4x4 */
1954         if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
1955         {
1956             u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
1957         }
1958         u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
1959 
1960         /* enable inter P16x16 */
1961         u4_valid_modes |= (1 << P16x16);
1962     }
1963     else if (ps_proc->i4_slice_type == BSLICE)
1964     {
1965         if (ps_proc->u4_frame_qp > 10)
1966         {
1967             /* enable intra 16x16 */
1968             u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
1969         }
1970 
1971         /* enable intra 4x4 */
1972         if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
1973         {
1974             u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
1975         }
1976         u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
1977 
1978         /* enable inter B16x16 */
1979         u4_valid_modes |= (1 << B16x16);
1980     }
1981 
1982 
1983     /* init entropy */
1984     ps_proc->s_entropy.i4_mb_x = ps_proc->i4_mb_x;
1985     ps_proc->s_entropy.i4_mb_y = ps_proc->i4_mb_y;
1986     ps_proc->s_entropy.i4_mb_cnt = MIN(ps_proc->i4_nmb_ntrpy, i4_wd_mbs - ps_proc->i4_mb_x);
1987 
1988     /* compute recon when :
1989      *   1. current frame is to be used as a reference
1990      *   2. dump recon for bit stream sanity check
1991      */
1992     ps_proc->u4_compute_recon = ps_codec->u4_is_curr_frm_ref ||
1993                                 ps_codec->s_cfg.u4_enable_recon;
1994 
1995     /* Encode 'n' macroblocks,
1996      * 'n' being the number of mbs dictated by current proc ctxt */
1997     for (i4_mb_idx = 0; i4_mb_idx < i4_mb_cnt; i4_mb_idx ++)
1998     {
1999         /* since we have not yet found sad, we have not yet got min sad */
2000         /* we need to initialize these variables for each MB */
2001         /* TODO how to get the min sad into the codec */
2002         ps_proc->u4_min_sad = ps_codec->s_cfg.i4_min_sad;
2003         ps_proc->u4_min_sad_reached = 0;
2004 
2005         /* mb analysis */
2006         {
2007             /* temp var */
2008             WORD32 i4_mb_id = ps_proc->i4_mb_x + ps_proc->i4_mb_y * i4_wd_mbs;
2009 
2010             /* force intra refresh ? */
2011             WORD32 i4_air_enable_inter = (ps_codec->s_cfg.e_air_mode == IVE_AIR_MODE_NONE) ||
2012                             (ps_codec->pu2_intr_rfrsh_map[i4_mb_id] != ps_codec->i4_air_pic_cnt);
2013 
2014             /* evaluate inter 16x16 modes */
2015             if ((u4_valid_modes & (1 << P16x16)) || (u4_valid_modes & (1 << B16x16)))
2016             {
2017                 /* compute nmb me */
2018                 if (ps_proc->i4_mb_x % ps_proc->u4_nmb_me == 0)
2019                 {
2020                     ih264e_compute_me_nmb(ps_proc, MIN((WORD32)ps_proc->u4_nmb_me,
2021                                                        i4_wd_mbs - ps_proc->i4_mb_x));
2022                 }
2023 
2024                 /* set pointers to ME data appropriately for other modules to use */
2025                 {
2026                     UWORD32 u4_mb_index = ps_proc->i4_mb_x % ps_proc->u4_nmb_me ;
2027 
2028                     /* get the min sad condition for current mb */
2029                     ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached;
2030                     ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad;
2031 
2032                     ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_skip_mv[0]);
2033                     ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_mb_index].s_ngbr_avbl);
2034                     ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_pred_mv[0]);
2035 
2036                     ps_proc->i4_mb_distortion = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_distortion;
2037                     ps_proc->i4_mb_cost = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_cost;
2038                     ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad;
2039                     ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached;
2040                     ps_proc->u4_mb_type = ps_proc->ps_nmb_info[u4_mb_index].u4_mb_type;
2041 
2042                     /* get the best sub pel buffer */
2043                     ps_proc->pu1_best_subpel_buf = ps_proc->ps_nmb_info[u4_mb_index].pu1_best_sub_pel_buf;
2044                     ps_proc->u4_bst_spel_buf_strd = ps_proc->ps_nmb_info[u4_mb_index].u4_bst_spel_buf_strd;
2045                 }
2046                 ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
2047             }
2048             else
2049             {
2050                 /* Derive neighbor availability for the current macroblock */
2051                 ps_proc->ps_ngbr_avbl = &ps_proc->s_ngbr_avbl;
2052 
2053                 ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
2054             }
2055 
2056             /*
2057              * If air says intra, we need to force the following code path to evaluate intra
2058              * The easy way is just to say that the inter cost is too much
2059              */
2060             if (!i4_air_enable_inter)
2061             {
2062                 ps_proc->u4_min_sad_reached = 0;
2063                 ps_proc->i4_mb_cost = INT_MAX;
2064                 ps_proc->i4_mb_distortion = INT_MAX;
2065             }
2066             else if (ps_proc->u4_mb_type == PSKIP)
2067             {
2068                 goto UPDATE_MB_INFO;
2069             }
2070 
2071             /* wait until the proc of [top + 1] mb is computed.
2072              * We wait till the proc dependencies are satisfied */
2073              if(ps_proc->i4_mb_y > 0)
2074              {
2075                 /* proc map */
2076                 UWORD8  *pu1_proc_map_top;
2077 
2078                 pu1_proc_map_top = ps_proc->pu1_proc_map + ((ps_proc->i4_mb_y - 1) * i4_wd_mbs);
2079 
2080                 while (1)
2081                 {
2082                     volatile UWORD8 *pu1_buf;
2083                     WORD32 idx = i4_mb_idx + 1;
2084 
2085                     idx = MIN(idx, ((WORD32)ps_codec->s_cfg.i4_wd_mbs - 1));
2086                     pu1_buf =  pu1_proc_map_top + idx;
2087                     if(*pu1_buf)
2088                         break;
2089                     ithread_yield();
2090                 }
2091             }
2092 
2093             /* If we already have the minimum sad, there is no point in searching for sad again */
2094             if (ps_proc->u4_min_sad_reached == 0)
2095             {
2096                 /* intra gating in inter slices */
2097                 /* No need of gating if we want to force intra, we need to find the threshold only if inter is enabled by AIR*/
2098                 if (i4_air_enable_inter && ps_proc->i4_slice_type != ISLICE && ps_codec->u4_inter_gate)
2099                 {
2100                     /* distortion of neighboring blocks */
2101                     WORD32 i4_distortion[4];
2102 
2103                     i4_distortion[0] = ps_proc->s_left_mb_syntax_ele.i4_mb_distortion;
2104 
2105                     i4_distortion[1] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x].i4_mb_distortion;
2106 
2107                     i4_distortion[2] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x + 1].i4_mb_distortion;
2108 
2109                     i4_distortion[3] = ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion;
2110 
2111                     i4_gate_threshold = (i4_distortion[0] + i4_distortion[1] + i4_distortion[2] + i4_distortion[3]) >> 2;
2112 
2113                 }
2114 
2115 
2116                 /* If we are going to force intra we need to evaluate intra irrespective of gating */
2117                 if ( (!i4_air_enable_inter) || ((i4_gate_threshold + 16 *((WORD32) ps_proc->u4_lambda)) < ps_proc->i4_mb_distortion))
2118                 {
2119                     /* evaluate intra 4x4 modes */
2120                     if (u4_valid_modes & (1 << I4x4))
2121                     {
2122                         if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
2123                         {
2124                             ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton(ps_proc);
2125                         }
2126                         else
2127                         {
2128                             ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff(ps_proc);
2129                         }
2130                     }
2131 
2132                     /* evaluate intra 16x16 modes */
2133                     if (u4_valid_modes & (1 << I16x16))
2134                     {
2135                         ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(ps_proc);
2136                     }
2137 
2138                     /* evaluate intra 8x8 modes */
2139                     if (u4_valid_modes & (1 << I8x8))
2140                     {
2141                         ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(ps_proc);
2142                     }
2143 
2144                 }
2145         }
2146      }
2147 
2148         /* is intra */
2149         if (ps_proc->u4_mb_type == I4x4 || ps_proc->u4_mb_type == I16x16 || ps_proc->u4_mb_type == I8x8)
2150         {
2151             luma_idx = ps_proc->u4_mb_type;
2152             chroma_idx = 0;
2153             is_intra = 1;
2154 
2155             /* evaluate chroma blocks for intra */
2156             ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(ps_proc);
2157         }
2158         else
2159         {
2160             luma_idx = 3;
2161             chroma_idx = 1;
2162             is_intra = 0;
2163         }
2164         ps_proc->u4_is_intra = is_intra;
2165         ps_proc->ps_pu->b1_intra_flag = is_intra;
2166 
2167         /* redo MV pred of neighbors in the case intra mb */
2168         /* TODO : currently called unconditionally, needs to be called only in the case of intra
2169          * to modify neighbors */
2170         if (ps_proc->i4_slice_type != ISLICE)
2171         {
2172             ih264e_mv_pred(ps_proc, ps_proc->i4_slice_type);
2173         }
2174 
2175         /* Perform luma mb core coding */
2176         u4_cbp_l = (ps_codec->luma_energy_compaction)[luma_idx](ps_proc);
2177 
2178         /* Perform luma mb core coding */
2179         u4_cbp_c = (ps_codec->chroma_energy_compaction)[chroma_idx](ps_proc);
2180 
2181         /* coded block pattern */
2182         ps_proc->u4_cbp = (u4_cbp_c << 4) | u4_cbp_l;
2183 
2184         if (!ps_proc->u4_is_intra)
2185         {
2186             if (ps_proc->i4_slice_type == BSLICE)
2187             {
2188                 if (ih264e_find_bskip_params(ps_proc, PRED_L0))
2189                 {
2190                     ps_proc->u4_mb_type = (ps_proc->u4_cbp) ? BDIRECT : BSKIP;
2191                 }
2192             }
2193             else if(!ps_proc->u4_cbp)
2194             {
2195                 if (ih264e_find_pskip_params(ps_proc, PRED_L0))
2196                 {
2197                     ps_proc->u4_mb_type = PSKIP;
2198                 }
2199             }
2200         }
2201 
2202 UPDATE_MB_INFO:
2203 
2204         /* Update mb sad, mb qp and intra mb cost. Will be used by rate control */
2205         ih264e_update_rc_mb_info(&ps_proc->s_frame_info, ps_proc);
2206 
2207         /**********************************************************************/
2208         /* if disable deblock level is '0' this implies enable deblocking for */
2209         /* all edges of all macroblocks with out any restrictions             */
2210         /*                                                                    */
2211         /* if disable deblock level is '1' this implies disable deblocking for*/
2212         /* all edges of all macroblocks with out any restrictions             */
2213         /*                                                                    */
2214         /* if disable deblock level is '2' this implies enable deblocking for */
2215         /* all edges of all macroblocks except edges overlapping with slice   */
2216         /* boundaries. This option is not currently supported by the encoder  */
2217         /* hence the slice map should be of no significance to perform debloc */
2218         /* king                                                               */
2219         /**********************************************************************/
2220 
2221         if (ps_proc->u4_compute_recon)
2222         {
2223             /* deblk context */
2224             /* src pointers */
2225             UWORD8 *pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma;
2226             UWORD8 *pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma;
2227 
2228             /* src indices */
2229             UWORD32 i4_mb_x = ps_proc->i4_mb_x;
2230             UWORD32 i4_mb_y = ps_proc->i4_mb_y;
2231 
2232             /* compute blocking strength */
2233             if (ps_proc->u4_disable_deblock_level != 1)
2234             {
2235                 ih264e_compute_bs(ps_proc);
2236             }
2237 
2238             /* nmb deblocking and hpel and padding */
2239             ih264e_dblk_pad_hpel_processing_n_mbs(ps_proc, pu1_cur_pic_luma,
2240                                                   pu1_cur_pic_chroma, i4_mb_x,
2241                                                   i4_mb_y);
2242         }
2243 
2244         /* update the context after for coding next mb */
2245         error_status |= ih264e_update_proc_ctxt(ps_proc);
2246 
2247         /* Once the last row is processed, mark the buffer status appropriately */
2248         if (ps_proc->i4_ht_mbs == ps_proc->i4_mb_y)
2249         {
2250             /* Pointer to current picture buffer structure */
2251             pic_buf_t *ps_cur_pic = ps_proc->ps_cur_pic;
2252 
2253             /* Pointer to current picture's mv buffer structure */
2254             mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf;
2255 
2256             /**********************************************************************/
2257             /* if disable deblock level is '0' this implies enable deblocking for */
2258             /* all edges of all macroblocks with out any restrictions             */
2259             /*                                                                    */
2260             /* if disable deblock level is '1' this implies disable deblocking for*/
2261             /* all edges of all macroblocks with out any restrictions             */
2262             /*                                                                    */
2263             /* if disable deblock level is '2' this implies enable deblocking for */
2264             /* all edges of all macroblocks except edges overlapping with slice   */
2265             /* boundaries. This option is not currently supported by the encoder  */
2266             /* hence the slice map should be of no significance to perform debloc */
2267             /* king                                                               */
2268             /**********************************************************************/
2269             error_status |= ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr, ps_cur_mv_buf->i4_buf_id , BUF_MGR_CODEC);
2270 
2271             error_status |= ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_cur_pic->i4_buf_id , BUF_MGR_CODEC);
2272 
2273             if (ps_codec->s_cfg.u4_enable_recon)
2274             {
2275                 /* pic cnt */
2276                 ps_codec->as_rec_buf[ctxt_sel].i4_pic_cnt = ps_proc->i4_pic_cnt;
2277 
2278                 /* rec buffers */
2279                 ps_codec->as_rec_buf[ctxt_sel].s_pic_buf  = *ps_proc->ps_cur_pic;
2280 
2281                 /* is last? */
2282                 ps_codec->as_rec_buf[ctxt_sel].u4_is_last = ps_proc->s_entropy.u4_is_last;
2283 
2284                 /* frame time stamp */
2285                 ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_high = ps_proc->s_entropy.u4_timestamp_high;
2286                 ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_low = ps_proc->s_entropy.u4_timestamp_low;
2287             }
2288 
2289         }
2290     }
2291 
2292     DEBUG_HISTOGRAM_DUMP(ps_codec->s_cfg.i4_ht_mbs == ps_proc->i4_mb_y);
2293 
2294     return error_status;
2295 }
2296 
2297 /**
2298 *******************************************************************************
2299 *
2300 * @brief
2301 *  Function to update rc context after encoding
2302 *
2303 * @par   Description
2304 *  This function updates the rate control context after the frame is encoded.
2305 *  Number of bits consumed by the current frame, frame distortion, frame cost,
2306 *  number of intra/inter mb's, ... are passed on to rate control context for
2307 *  updating the rc model.
2308 *
2309 * @param[in] ps_codec
2310 *  Handle to codec context
2311 *
2312 * @param[in] ctxt_sel
2313 *  frame context selector
2314 *
2315 * @param[in] pic_cnt
2316 *  pic count
2317 *
2318 * @returns i4_stuffing_byte
2319 *  number of stuffing bytes (if necessary)
2320 *
2321 * @remarks
2322 *
2323 *******************************************************************************
2324 */
ih264e_update_rc_post_enc(codec_t * ps_codec,WORD32 ctxt_sel,WORD32 i4_is_first_frm)2325 WORD32 ih264e_update_rc_post_enc(codec_t *ps_codec, WORD32 ctxt_sel, WORD32 i4_is_first_frm)
2326 {
2327     /* proc set base idx */
2328     WORD32 i4_proc_ctxt_sel_base = ctxt_sel ? (MAX_PROCESS_CTXT / 2) : 0;
2329 
2330     /* proc ctxt */
2331     process_ctxt_t *ps_proc = &ps_codec->as_process[i4_proc_ctxt_sel_base];
2332 
2333     /* frame qp */
2334     UWORD8 u1_frame_qp = ps_codec->u4_frame_qp;
2335 
2336     /* cbr rc return status */
2337     WORD32 i4_stuffing_byte = 0;
2338 
2339     /* current frame stats */
2340     frame_info_t s_frame_info;
2341     picture_type_e rc_pic_type;
2342 
2343     /* temp var */
2344     WORD32 i, j;
2345 
2346     /********************************************************************/
2347     /*                            BEGIN INIT                            */
2348     /********************************************************************/
2349 
2350     /* init frame info */
2351     irc_init_frame_info(&s_frame_info);
2352 
2353     /* get frame info */
2354     for (i = 0; i < (WORD32)ps_codec->s_cfg.u4_num_cores; i++)
2355     {
2356         /*****************************************************************/
2357         /* One frame can be encoded by max of u4_num_cores threads       */
2358         /* Accumulating the num mbs, sad, qp and intra_mb_cost from      */
2359         /* u4_num_cores threads                                          */
2360         /*****************************************************************/
2361         for (j = 0; j< MAX_MB_TYPE; j++)
2362         {
2363             s_frame_info.num_mbs[j] += ps_proc[i].s_frame_info.num_mbs[j];
2364 
2365             s_frame_info.tot_mb_sad[j] += ps_proc[i].s_frame_info.tot_mb_sad[j];
2366 
2367             s_frame_info.qp_sum[j] += ps_proc[i].s_frame_info.qp_sum[j];
2368         }
2369 
2370         s_frame_info.intra_mb_cost_sum += ps_proc[i].s_frame_info.intra_mb_cost_sum;
2371 
2372         s_frame_info.activity_sum += ps_proc[i].s_frame_info.activity_sum;
2373 
2374         /*****************************************************************/
2375         /* gather number of residue and header bits consumed by the frame*/
2376         /*****************************************************************/
2377         ih264e_update_rc_bits_info(&s_frame_info, &ps_proc[i].s_entropy);
2378     }
2379 
2380     /* get pic type */
2381     switch (ps_codec->pic_type)
2382     {
2383         case PIC_I:
2384         case PIC_IDR:
2385             rc_pic_type = I_PIC;
2386             break;
2387         case PIC_P:
2388             rc_pic_type = P_PIC;
2389             break;
2390         case PIC_B:
2391             rc_pic_type = B_PIC;
2392             break;
2393         default:
2394             assert(0);
2395             break;
2396     }
2397 
2398     /* update rc lib with current frame stats */
2399     i4_stuffing_byte =  ih264e_rc_post_enc(ps_codec->s_rate_control.pps_rate_control_api,
2400                                           &(s_frame_info),
2401                                           ps_codec->s_rate_control.pps_pd_frm_rate,
2402                                           ps_codec->s_rate_control.pps_time_stamp,
2403                                           ps_codec->s_rate_control.pps_frame_time,
2404                                           (ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs),
2405                                           &rc_pic_type,
2406                                           i4_is_first_frm,
2407                                           &ps_codec->s_rate_control.post_encode_skip[ctxt_sel],
2408                                           u1_frame_qp,
2409                                           &ps_codec->s_rate_control.num_intra_in_prev_frame,
2410                                           &ps_codec->s_rate_control.i4_avg_activity);
2411     return i4_stuffing_byte;
2412 }
2413 
2414 /**
2415 *******************************************************************************
2416 *
2417 * @brief
2418 *  entry point of a spawned encoder thread
2419 *
2420 * @par Description:
2421 *  The encoder thread dequeues a proc/entropy job from the encoder queue and
2422 *  calls necessary routines.
2423 *
2424 * @param[in] pv_proc
2425 *  Process context corresponding to the thread
2426 *
2427 * @returns  error status
2428 *
2429 * @remarks
2430 *
2431 *******************************************************************************
2432 */
ih264e_process_thread(void * pv_proc)2433 WORD32 ih264e_process_thread(void *pv_proc)
2434 {
2435     /* error status */
2436     IH264_ERROR_T ret = IH264_SUCCESS;
2437     WORD32 error_status = IH264_SUCCESS;
2438 
2439     /* proc ctxt */
2440     process_ctxt_t *ps_proc = pv_proc;
2441 
2442     /* codec ctxt */
2443     codec_t *ps_codec = ps_proc->ps_codec;
2444 
2445     /* structure to represent a processing job entry */
2446     job_t s_job;
2447 
2448     /* blocking call : entropy dequeue is non-blocking till all
2449      * the proc jobs are processed */
2450     WORD32 is_blocking = 0;
2451 
2452     /* set affinity */
2453     ithread_set_affinity(ps_proc->i4_id);
2454 
2455     while(1)
2456     {
2457         /* dequeue a job from the entropy queue */
2458         {
2459             int error = ithread_mutex_lock(ps_codec->pv_entropy_mutex);
2460 
2461             /* codec context selector */
2462             WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
2463 
2464             volatile UWORD32 *pu4_buf = &ps_codec->au4_entropy_thread_active[ctxt_sel];
2465 
2466             /* have the lock */
2467             if (error == 0)
2468             {
2469                 if (*pu4_buf == 0)
2470                 {
2471                     /* no entropy threads are active, try dequeuing a job from the entropy queue */
2472                     ret = ih264_list_dequeue(ps_proc->pv_entropy_jobq, &s_job, is_blocking);
2473                     if (IH264_SUCCESS == ret)
2474                     {
2475                         *pu4_buf = 1;
2476                         ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2477                         goto WORKER;
2478                     }
2479                     else if(is_blocking)
2480                     {
2481                         ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2482                         break;
2483                     }
2484                 }
2485                 ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2486             }
2487         }
2488 
2489         /* dequeue a job from the process queue */
2490         ret = ih264_list_dequeue(ps_proc->pv_proc_jobq, &s_job, 1);
2491         if (IH264_SUCCESS != ret)
2492         {
2493             if(ps_proc->i4_id)
2494                 break;
2495             else
2496             {
2497                 is_blocking = 1;
2498                 continue;
2499             }
2500         }
2501 
2502 WORKER:
2503         /* choose appropriate proc context based on proc_base_idx */
2504         ps_proc = &ps_codec->as_process[ps_proc->i4_id + s_job.i2_proc_base_idx];
2505 
2506         switch (s_job.i4_cmd)
2507         {
2508             case CMD_PROCESS:
2509                 ps_proc->i4_mb_cnt = s_job.i2_mb_cnt;
2510                 ps_proc->i4_mb_x = s_job.i2_mb_x;
2511                 ps_proc->i4_mb_y = s_job.i2_mb_y;
2512 
2513                 /* init process context */
2514                 ih264e_init_proc_ctxt(ps_proc);
2515 
2516                 /* core code all mbs enlisted under the current job */
2517                 error_status |= ih264e_process(ps_proc);
2518                 break;
2519 
2520             case CMD_ENTROPY:
2521                 ps_proc->s_entropy.i4_mb_x = s_job.i2_mb_x;
2522                 ps_proc->s_entropy.i4_mb_y = s_job.i2_mb_y;
2523                 ps_proc->s_entropy.i4_mb_cnt = s_job.i2_mb_cnt;
2524 
2525                 /* init entropy */
2526                 ih264e_init_entropy_ctxt(ps_proc);
2527 
2528                 /* entropy code all mbs enlisted under the current job */
2529                 error_status |= ih264e_entropy(ps_proc);
2530                 break;
2531 
2532             default:
2533                 error_status |= IH264_FAIL;
2534                 break;
2535         }
2536     }
2537 
2538     /* send error code */
2539     ps_proc->i4_error_code = error_status;
2540     return ret;
2541 }
2542