1 /****************************************************************************** 2 * 3 * Copyright (C) 2018 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ***************************************************************************** 18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 */ 20 /*! 21 ****************************************************************************** 22 * \file ihevce_enc_structs.h 23 * 24 * \brief 25 * This file contains structure definations of Encoder 26 * 27 * \date 28 * 18/09/2012 29 * 30 * \author 31 * Ittiam 32 * 33 ****************************************************************************** 34 */ 35 36 #ifndef _IHEVCE_ENC_STRUCTS_H_ 37 #define _IHEVCE_ENC_STRUCTS_H_ 38 39 /*****************************************************************************/ 40 /* Constant Macros */ 41 /*****************************************************************************/ 42 #define HEVCE_MAX_WIDTH 1920 43 #define HEVCE_MAX_HEIGHT 1088 44 45 #define HEVCE_MIN_WIDTH 64 46 #define HEVCE_MIN_HEIGHT 64 47 48 #define MAX_CTBS_IN_FRAME (HEVCE_MAX_WIDTH * HEVCE_MAX_HEIGHT) / (MIN_CTB_SIZE * MIN_CTB_SIZE) 49 #define MAX_NUM_CTB_ROWS_FRM (HEVCE_MAX_HEIGHT) / (MIN_CTB_SIZE) 50 51 #define MIN_VERT_PROC_UNIT (8) 52 #define MAX_NUM_VERT_UNITS_FRM (HEVCE_MAX_HEIGHT) / (MIN_VERT_PROC_UNIT) 53 54 #define HEVCE_MAX_REF_PICS 8 55 #define HEVCE_MAX_DPB_PICS (HEVCE_MAX_REF_PICS + 1) 56 57 #define PAD_HORZ 80 58 #define PAD_VERT 80 59 60 #define DEFAULT_MAX_REFERENCE_PICS 4 61 62 #define BLU_RAY_SUPPORT 231457 63 64 /** @brief max number of parts in minCU : max 4 for NxN */ 65 #define NUM_PU_PARTS 4 66 /** @brief max number of parts in Inter CU */ 67 #define NUM_INTER_PU_PARTS (MAX_NUM_INTER_PARTS) 68 #define SEND_BI_RDOPT 69 #ifdef SEND_BI_RDOPT 70 /** @brief */ 71 #define MAX_INTER_CU_CANDIDATES 4 72 #else 73 /** @brief */ 74 #define MAX_INTER_CU_CANDIDATES 3 75 #endif 76 /** @brief */ 77 #define MAX_INTRA_CU_CANDIDATES 3 78 79 #define MAX_INTRA_CANDIDATES 35 80 81 /** For each resolution & bit-rate instance, one entropy thread is created */ 82 #define NUM_ENTROPY_THREADS (IHEVCE_MAX_NUM_RESOLUTIONS * IHEVCE_MAX_NUM_BITRATES) 83 84 /* Number of buffers between Decomp and HME layers 1 : Seq mode >1 parallel mode */ 85 #define NUM_BUFS_DECOMP_HME 1 86 87 /** Macro to indicate pre me and L0 ipe stagger in pre enc*/ 88 /** Implies MAX_PRE_ENC_STAGGER - 1 max stagger*/ 89 #define MAX_PRE_ENC_STAGGER (NUM_LAP2_LOOK_AHEAD + 1 + MIN_L1_L0_STAGGER_NON_SEQ) 90 91 #define NUM_ME_ENC_BUFS (MAX_NUM_ENC_LOOP_PARALLEL) 92 93 #define MIN_L0_IPE_ENC_STAGGER 1 94 95 /*stagger between L0 IPE and enc*/ 96 #define MAX_L0_IPE_ENC_STAGGER (NUM_ME_ENC_BUFS + (MIN_L0_IPE_ENC_STAGGER)) 97 98 #define MAX_PRE_ENC_RC_DELAY (MAX_L0_IPE_ENC_STAGGER + 1 + NUM_BUFS_DECOMP_HME) 99 100 #define MIN_PRE_ENC_RC_DELAY (MIN_L0_IPE_ENC_STAGGER + 1 + NUM_BUFS_DECOMP_HME) 101 102 /** @brief number of ctb contexts maintained at frame level b/w encode : entropy */ 103 #define NUM_FRMPROC_ENTCOD_BUFS 1 104 105 /** @brief number of extra recon buffs required for stagger design*/ 106 #define NUM_EXTRA_RECON_BUFS 0 107 108 /** recon picture buffer size need to be increased to support EncLoop Parallelism **/ 109 #define NUM_EXTRA_RECON_BUFS_FOR_ELP 0 110 111 /** @brief maximum number of bytes in 4x4 afetr scanning */ 112 #define MAX_SCAN_COEFFS_BYTES_4x4 (48) 113 114 /** @brief maximum number of luma coeffs bytes after scan at CTB level */ 115 #define MAX_LUMA_COEFFS_CTB ((MAX_SCAN_COEFFS_BYTES_4x4) * (MAX_TU_IN_CTB)*4) 116 117 /** @brief maximum number of chroma coeffs bytes after scan at CTB level */ 118 #define MAX_CHRM_COEFFS_CTB ((MAX_SCAN_COEFFS_BYTES_4x4) * ((MAX_TU_IN_CTB >> 1)) * 4) 119 120 /** @brief maximum number of coeffs bytes after scan at CTB level */ 121 #define MAX_SCAN_COEFFS_CTB ((MAX_LUMA_COEFFS_CTB) + (MAX_CHRM_COEFFS_CTB)) 122 123 /** @breif PU map CTB buffer buyes for neighbour availibility */ 124 #define MUN_PU_MAP_BYTES_PER_CTB (MAX_PU_IN_CTB_ROW * MAX_PU_IN_CTB_ROW) 125 126 /** @brief tottal system memory records */ 127 #define TOTAL_SYSTEM_MEM_RECS 120 128 129 /** @brief number of input async command buffers */ 130 #define NUM_AYSNC_CMD_BUFS 4 131 132 /** @brief Comand buffers size */ 133 #define ENC_COMMAND_BUFF_SIZE 512 /* 512 bytes */ 134 135 /** @brief Number of output buffers */ 136 #define NUM_OUTPUT_BUFS 4 137 138 /** @brief Lamda for SATD cost estimation */ 139 #define LAMDA_SATD 1 140 141 /** @brief Maximum number of 1s in u2_sig_coeff_abs_gt1_flags */ 142 #define MAX_GT_ONE 8 143 144 /** MAX num ipntra pred modes */ 145 #define MAX_NUM_IP_MODES 35 146 147 /** Number of best intra modes used for intra mode refinement */ 148 #define NUM_BEST_MODES 3 149 150 /** Maximim number of parallel frame processing threads in pre enocde group */ 151 #define MAX_NUM_FRM_PROC_THRDS_PRE_ENC MAX_NUM_CORES 152 153 /** Maximim number of parallel frame processing threads in encode group */ 154 #define MAX_NUM_FRM_PROC_THRDS_ENC MAX_NUM_CORES 155 156 /** Macro to indicate teh PING_PONG buffers for stagger*/ 157 #define PING_PONG_BUF 2 158 159 /** Max number of layers in Motion estimation 160 * should be greater than or equal to MAX_NUM_LAYERS defined in hme_interface.h 161 */ 162 163 #define MAX_NUM_HME_LAYERS 5 164 /** 165 ****************************************************************************** 166 * @brief Maximum number of layers allowed 167 ****************************************************************************** 168 */ 169 #define MAX_NUM_LAYERS 4 170 171 #define NUM_RC_PIC_TYPE 9 172 173 #define MAX_NUM_NODES_CU_TREE (85) 174 175 /* macros to control Dynamic load balance */ 176 #define DYN_LOAD_BAL_UPPER_LIMIT 0.80 177 178 #define DYN_LOAD_BAL_LOWER_LIMIT 0.20 179 180 #define NUM_SUB_GOP_DYN_BAL 1 181 182 #define MIN_NUM_FRMS_DYN_BAL 4 183 184 #define CORES_SRES_OR_MRES 2 185 186 #define HME_HIGH_SAD_BLK_THRESH 35 187 188 /* Enable to compare cabac states of final entropy thread with enc loop states */ 189 #define VERIFY_ENCLOOP_CABAC_STATES 0 190 191 #define MAX_NUM_BLKS_IN_MAX_CU 64 /* max cu size is 64x64 */ 192 193 /*****************************************************************************/ 194 /* Function Macros */ 195 /*****************************************************************************/ 196 197 /*****************************************************************************/ 198 /* Typedefs */ 199 /*****************************************************************************/ 200 typedef void (*pf_iq_it_rec)( 201 WORD16 *pi2_src, 202 WORD16 *pi2_tmp, 203 UWORD8 *pu1_pred, 204 WORD16 *pi2_dequant_coeff, 205 UWORD8 *pu1_dst, 206 WORD32 qp_div, /* qpscaled / 6 */ 207 WORD32 qp_rem, /* qpscaled % 6 */ 208 WORD32 src_strd, 209 WORD32 pred_strd, 210 WORD32 dst_strd, 211 WORD32 zero_cols, 212 WORD32 zero_rows); 213 214 typedef void (*pf_intra_pred)( 215 UWORD8 *pu1_ref, WORD32 src_strd, UWORD8 *pu1_dst, WORD32 dst_strd, WORD32 nt, WORD32 mode); 216 217 typedef UWORD32 (*pf_res_trans_luma)( 218 UWORD8 *pu1_src, 219 UWORD8 *pu1_pred, 220 WORD32 *pi4_tmp, 221 WORD16 *pi2_dst, 222 WORD32 src_strd, 223 WORD32 pred_strd, 224 WORD32 dst_strd, 225 CHROMA_PLANE_ID_T e_chroma_plane); 226 227 typedef WORD32 (*pf_quant)( 228 WORD16 *pi2_coeffs, 229 WORD16 *pi2_quant_coeff, 230 WORD16 *pi2_dst, 231 WORD32 qp_div, /* qpscaled / 6 */ 232 WORD32 qp_rem, /* qpscaled % 6 */ 233 WORD32 q_add, 234 WORD32 src_strd, 235 WORD32 dst_strd, 236 UWORD8 *pu1_csbf_buf, 237 WORD32 csbf_strd, 238 WORD32 *zero_cols, 239 WORD32 *zero_row); 240 241 /*****************************************************************************/ 242 /* Enums */ 243 /*****************************************************************************/ 244 /// supported partition shape 245 typedef enum 246 { 247 SIZE_2Nx2N = 0, ///< symmetric motion partition, 2Nx2N 248 SIZE_2NxN = 1, ///< symmetric motion partition, 2Nx N 249 SIZE_Nx2N = 2, ///< symmetric motion partition, Nx2N 250 SIZE_NxN = 3, ///< symmetric motion partition, Nx N 251 SIZE_2NxnU = 4, ///< asymmetric motion partition, 2Nx( N/2) + 2Nx(3N/2) 252 SIZE_2NxnD = 5, ///< asymmetric motion partition, 2Nx(3N/2) + 2Nx( N/2) 253 SIZE_nLx2N = 6, ///< asymmetric motion partition, ( N/2)x2N + (3N/2)x2N 254 SIZE_nRx2N = 7 ///< asymmetric motion partition, (3N/2)x2N + ( N/2)x2N 255 } PART_SIZE_E; 256 257 /** @brief Interface level Queues of Encoder */ 258 259 typedef enum 260 { 261 IHEVCE_INPUT_DATA_CTRL_Q = 0, 262 IHEVCE_ENC_INPUT_Q, 263 IHEVCE_INPUT_ASYNCH_CTRL_Q, 264 IHEVCE_OUTPUT_DATA_Q, 265 IHEVCE_OUTPUT_STATUS_Q, 266 IHEVCE_RECON_DATA_Q, // /*que for holding recon buffer */ 267 268 IHEVCE_FRM_PRS_ENT_COD_Q, /*que for holding output buffer of enc_loop |input buffer of entropy */ 269 270 IHEVCE_PRE_ENC_ME_Q, /*que for holding input buffer to ME | output of pre-enc */ 271 272 IHEVCE_ME_ENC_RDOPT_Q, /* que for holding output buffer of ME or input buffer of Enc-RDopt */ 273 274 IHEVCE_L0_IPE_ENC_Q, /* Queue for holding L0 ipe data to enc loop*/ 275 276 /* should be last entry */ 277 IHEVCE_MAX_NUM_QUEUES 278 279 } IHEVCE_Q_DESC_T; 280 281 /*****************************************************************************/ 282 /* Structure */ 283 /*****************************************************************************/ 284 285 /** 286 RC_QP_QSCALE conversion structures 287 **/ 288 typedef struct 289 { 290 WORD16 i2_min_qp; 291 292 WORD16 i2_max_qp; 293 294 WORD16 i2_min_qscale; 295 296 WORD16 i2_max_qscale; 297 298 WORD32 *pi4_qscale_to_qp; 299 300 WORD32 *pi4_qp_to_qscale_q_factor; 301 302 WORD32 *pi4_qp_to_qscale; 303 304 WORD8 i1_qp_offset; 305 306 } rc_quant_t; 307 308 /** 309 ****************************************************************************** 310 * @brief 4x4 level structure which contains all the parameters 311 * for neighbour prediction puopose 312 ****************************************************************************** 313 */ 314 typedef struct 315 { 316 /** PU motion vectors */ 317 pu_mv_t mv; 318 /** Intra or Inter flag for each partition - 0 or 1 */ 319 UWORD16 b1_intra_flag : 1; 320 /** CU skip flag - 0 or 1 */ 321 UWORD16 b1_skip_flag : 1; 322 /** CU depth in CTB tree (0-3) */ 323 UWORD16 b2_cu_depth : 2; 324 325 /** Y Qp for loop filter */ 326 WORD16 b8_qp : 8; 327 328 /** Luma Intra Mode 0 - 34 */ 329 UWORD16 b6_luma_intra_mode : 6; 330 331 /** Y CBF for BS compute */ 332 UWORD16 b1_y_cbf : 1; 333 /** Pred L0 flag of current 4x4 */ 334 UWORD16 b1_pred_l0_flag : 1; 335 336 /** Pred L0 flag of current 4x4 */ 337 UWORD16 b1_pred_l1_flag : 1; 338 } nbr_4x4_t; 339 340 typedef struct 341 { 342 /** Bottom Left availability flag */ 343 UWORD8 u1_bot_lt_avail; 344 345 /** Left availability flag */ 346 UWORD8 u1_left_avail; 347 348 /** Top availability flag */ 349 UWORD8 u1_top_avail; 350 351 /** Top Right availability flag */ 352 UWORD8 u1_top_rt_avail; 353 354 /** Top Left availability flag */ 355 UWORD8 u1_top_lt_avail; 356 357 } nbr_avail_flags_t; 358 359 typedef struct 360 { 361 /** prev intra flag*/ 362 UWORD8 b1_prev_intra_luma_pred_flag : 1; 363 364 /** mpm_idx */ 365 UWORD8 b2_mpm_idx : 2; 366 367 /** reminder pred mode */ 368 UWORD8 b5_rem_intra_pred_mode : 5; 369 370 } intra_prev_rem_flags_t; 371 372 /** 373 ****************************************************************************** 374 * @brief calc (T+Q+RDOQ) output TU structure; entropy input TU structure 375 ****************************************************************************** 376 */ 377 typedef struct 378 { 379 /** base tu structure */ 380 tu_t s_tu; 381 382 /** offset of luma data in ecd buffer */ 383 WORD32 i4_luma_coeff_offset; 384 385 /** offset of cb data in ecd buffer */ 386 WORD32 ai4_cb_coeff_offset[2]; 387 388 /** offset of cr data in ecd buffer */ 389 WORD32 ai4_cr_coeff_offset[2]; 390 391 } tu_enc_loop_out_t; 392 393 typedef struct 394 { 395 /* L0 Motion Vector */ 396 mv_t s_l0_mv; 397 398 /* L1 Motion Vector */ 399 mv_t s_l1_mv; 400 401 /* L0 Ref index */ 402 WORD8 i1_l0_ref_idx; 403 404 /* L1 Ref index */ 405 WORD8 i1_l1_ref_idx; 406 407 /* L0 Ref Pic Buf ID */ 408 WORD8 i1_l0_pic_buf_id; 409 410 /* L1 Ref Pic Buf ID */ 411 WORD8 i1_l1_pic_buf_id; 412 413 /** intra flag */ 414 UWORD8 b1_intra_flag : 1; 415 416 /* Pred mode */ 417 UWORD8 b2_pred_mode : 2; 418 419 /* reserved flag can be used for something later */ 420 UWORD8 u1_reserved; 421 422 } pu_col_mv_t; 423 424 /*****************************************************************************/ 425 /* Encoder uses same structure as pu_t for prediction unit */ 426 /*****************************************************************************/ 427 428 /** 429 ****************************************************************************** 430 * @brief Encode loop (T+Q+RDOQ) output CU structure; entropy input CU structure 431 ****************************************************************************** 432 */ 433 typedef struct 434 { 435 /* CU X position in terms of min CU (8x8) units */ 436 UWORD32 b3_cu_pos_x : 3; 437 438 /* CU Y position in terms of min CU (8x8) units */ 439 UWORD32 b3_cu_pos_y : 3; 440 441 /** CU size in terms of min CU (8x8) units */ 442 UWORD32 b4_cu_size : 4; 443 444 /** transquant bypass flag ; 0 for this encoder */ 445 UWORD32 b1_tq_bypass_flag : 1; 446 447 /** cu skip flag */ 448 UWORD32 b1_skip_flag : 1; 449 450 /** intra / inter CU flag */ 451 UWORD32 b1_pred_mode_flag : 1; 452 453 /** indicates partition information for CU 454 * For intra 0 : for 2Nx2N / 1 for NxN iff CU=minCBsize 455 * For inter 0 : @sa PART_SIZE_E 456 */ 457 UWORD32 b3_part_mode : 3; 458 459 /** 0 for this encoder */ 460 UWORD32 b1_pcm_flag : 1; 461 462 /** only applicable for intra cu */ 463 UWORD32 b3_chroma_intra_pred_mode : 3; 464 465 /** no residue flag for cu */ 466 UWORD32 b1_no_residual_syntax_flag : 1; 467 468 /* flag to indicate if current CU is the first 469 CU of the Quantisation group*/ 470 UWORD32 b1_first_cu_in_qg : 1; 471 472 /** Intra prev and reminder flags 473 * if part is NxN the tntries 1,2,3 will be valid 474 * other wise only enry 0 will be set. 475 */ 476 intra_prev_rem_flags_t as_prev_rem[NUM_PU_PARTS]; 477 478 /** 479 * Access valid number of pus in this array based on u1_part_mode 480 * Moiton vector differentials and reference idx should be 481 * populated in this structure 482 * @remarks shall be accessed only for inter pus 483 */ 484 pu_t *ps_pu; 485 486 /** 487 * pointer to first tu of this cu. Each TU need to be populated 488 * in TU order by calc. Total TUs in CU is given by u2_num_tus_in_cu 489 */ 490 tu_enc_loop_out_t *ps_enc_tu; 491 492 /** total TUs in this CU; shall be 0 if b1_no_residual_syntax_flag = 1 */ 493 UWORD16 u2_num_tus_in_cu; 494 495 /** Coeff bufer pointer */ 496 /* Pointer to transform coeff data */ 497 /*************************************************************************/ 498 /* Following format is repeated for every coded TU */ 499 /* Luma Block */ 500 /* num_coeffs : 16 bits */ 501 /* zero_cols : 8 bits ( 1 bit per 4 columns) */ 502 /* sig_coeff_map : ((TU Size * TU Size) + 31) >> 5 number of WORD32s */ 503 /* coeff_data : Non zero coefficients */ 504 /* Cb Block (only for last TU in 4x4 case else for every luma TU) */ 505 /* num_coeffs : 16 bits */ 506 /* zero_cols : 8 bits ( 1 bit per 4 columns) */ 507 /* sig_coeff_map : ((TU Size * TU Size) + 31) >> 5 number of WORD32s */ 508 /* coeff_data : Non zero coefficients */ 509 /* Cr Block (only for last TU in 4x4 case else for every luma TU) */ 510 /* num_coeffs : 16 bits */ 511 /* zero_cols : 8 bits ( 1 bit per 4 columns) */ 512 /* sig_coeff_map : ((TU Size * TU Size) + 31) >> 5 number of WORD32s */ 513 /* coeff_data : Non zero coefficients */ 514 /*************************************************************************/ 515 void *pv_coeff; 516 517 /** qp used during for CU 518 * @remarks : 519 */ 520 WORD8 i1_cu_qp; 521 522 } cu_enc_loop_out_t; 523 524 /** 525 * SAO 526 */ 527 typedef struct 528 { 529 /** 530 * sao_type_idx_luma 531 */ 532 UWORD32 b3_y_type_idx : 3; 533 534 /** 535 * luma sao_band_position 536 */ 537 UWORD32 b5_y_band_pos : 5; 538 539 /** 540 * sao_type_idx_chroma 541 */ 542 UWORD32 b3_cb_type_idx : 3; 543 544 /** 545 * cb sao_band_position 546 */ 547 UWORD32 b5_cb_band_pos : 5; 548 549 /** 550 * sao_type_idx_chroma 551 */ 552 UWORD32 b3_cr_type_idx : 3; 553 554 /** 555 * cb sao_band_position 556 */ 557 UWORD32 b5_cr_band_pos : 5; 558 559 /*SAO Offsets 560 * In all these offsets, 0th element is not used 561 */ 562 /** 563 * luma SaoOffsetVal[i] 564 */ 565 WORD8 u1_y_offset[5]; 566 567 /** 568 * chroma cb SaoOffsetVal[i] 569 */ 570 WORD8 u1_cb_offset[5]; 571 572 /** 573 * chroma cr SaoOffsetVal[i] 574 */ 575 WORD8 u1_cr_offset[5]; 576 577 /** 578 * sao_merge_left_flag common for y,cb,cr 579 */ 580 UWORD32 b1_sao_merge_left_flag : 1; 581 582 /** 583 * sao_merge_up_flag common for y,cb,cr 584 */ 585 UWORD32 b1_sao_merge_up_flag : 1; 586 587 } sao_enc_t; 588 589 /** 590 ****************************************************************************** 591 * @brief ctb output structure; output of Encode loop, input to entropy 592 ****************************************************************************** 593 */ 594 typedef struct 595 { 596 /** 597 * bit0 : depth0 split flag, (64x64 splits) 598 * bits 1-3 : not used 599 * bits 4-7 : depth1 split flags; valid iff depth0 split=1 (32x32 splits) 600 * bits 8-23: depth2 split flags; (if 0 16x16 is cu else 8x8 min cu) 601 602 * if a split flag of n is set for depth 1, check the following split flags 603 * of [(8 + 4*(n-4)): (8 + 4*(n-4)+ 3)] for depth 2: 604 * 605 */ 606 UWORD32 u4_cu_split_flags; 607 608 /*************************************************************** 609 * For any given CU position CU_posx, CU_posy access 610 * au4_packed_tu_split_flags[(CU_posx >> 5)[(CU_posy >> 5)] 611 * Note : For CTB size smaller than 64x64 only use u4_packed_tu_split_flags[0] 612 ****************************************************************/ 613 614 /** 615 * access bits corresponding to actual CU size till leaf nodes 616 * bit0 : (32x32 TU split flag) 617 * bits 1-3 : not used 618 * bits 4-7 : (16x16 TUsplit flags) 619 * bits 8-23: (8x8 TU split flags) 620 621 * if a split flag of n is set for depth 1, check the following split flags 622 * of [(8 + 4*(n-4)): (8 + 4*(n-4)+ 3)] for depth 2: 623 * 624 * @remarks As tu sizes are relative to CU sizes the producer has to 625 * make sure the correctness of u4_packed_tu_split_flags. 626 * 627 * @remarks au4_packed_tu_split_flags_cu[1]/[2]/[3] to be used only 628 * for 64x64 ctb. 629 */ 630 UWORD32 au4_packed_tu_split_flags_cu[4]; 631 632 /** 633 * pointer to first CU of CTB. Each CU need to be populated 634 * in CU order by calc. Total CUs in CTB is given by u1_num_cus_in_ctb 635 */ 636 cu_enc_loop_out_t *ps_enc_cu; 637 638 /** total TUs in this CU; shall be 0 if b1_no_residual_syntax_flag = 1 */ 639 UWORD8 u1_num_cus_in_ctb; 640 641 /** CTB neighbour availability flags */ 642 nbr_avail_flags_t s_ctb_nbr_avail_flags; 643 644 /* SAO parameters of the CTB */ 645 sao_enc_t s_sao; 646 647 } ctb_enc_loop_out_t; 648 649 /** 650 ****************************************************************************** 651 * @brief cu inter candidate for encoder 652 ****************************************************************************** 653 */ 654 typedef struct 655 { 656 /** base pu structure 657 * access valid number of entries in this array based on u1_part_size 658 */ 659 pu_t as_inter_pu[NUM_INTER_PU_PARTS]; 660 661 /* TU split flag : tu_split_flag[0] represents the transform splits 662 * for CU size <= 32, for 64x64 each ai4_tu_split_flag corresponds 663 * to respective 32x32 */ 664 /* For a 8x8 TU - 1 bit used to indicate split */ 665 /* For a 16x16 TU - LSB used to indicate winner between 16 and 8 TU's. 4 other bits used to indicate split in each 8x8 quadrant */ 666 /* For a 32x32 TU - See above */ 667 WORD32 ai4_tu_split_flag[4]; 668 669 /* TU split flag : tu_split_flag[0] represents the transform splits 670 * for CU size <= 32, for 64x64 each ai4_tu_split_flag corresponds 671 * to respective 32x32 */ 672 /* For a 8x8 TU - 1 bit used to indicate split */ 673 /* For a 16x16 TU - LSB used to indicate winner between 16 and 8 TU's. 4 other bits used to indicate split in each 8x8 quadrant */ 674 /* For a 32x32 TU - See above */ 675 WORD32 ai4_tu_early_cbf[4]; 676 677 /**Pointer to the buffer having predicted data after mc in SATD stage 678 * Since we have 2 buffers for each candidate pred data for best merge candidate 679 * can be in one of the 2 buffers. 680 */ 681 UWORD8 *pu1_pred_data; 682 683 UWORD16 *pu2_pred_data; 684 685 UWORD8 *pu1_pred_data_scr; 686 687 UWORD16 *pu2_pred_data_src; 688 689 /* Total cost: SATD cost + MV cost */ 690 WORD32 i4_total_cost; 691 692 /** Stride for predicted data*/ 693 WORD32 i4_pred_data_stride; 694 695 /** @remarks u1_part_size can be non square only for Inter */ 696 UWORD8 b3_part_size : 3; /* @sa: PART_SIZE_E */ 697 698 /** evaluate transform for cusize iff this flag is 1 */ 699 /** this flag should be set 0 if CU is 64x64 */ 700 UWORD8 b1_eval_tx_cusize : 1; 701 702 /** evaluate transform for cusize/2 iff this flag is 1 */ 703 UWORD8 b1_eval_tx_cusize_by2 : 1; 704 705 /** Skip Flag : ME should always set this 0 for the candidates */ 706 UWORD8 b1_skip_flag : 1; 707 708 UWORD8 b1_intra_has_won : 1; 709 710 /* used to mark if this mode needs to be evaluated in auxiliary mode */ 711 /* if 1, this mode will be evaluated otherwise not.*/ 712 UWORD8 b1_eval_mark : 1; 713 714 } cu_inter_cand_t; 715 716 /** 717 ****************************************************************************** 718 * @brief cu intra candidate for encoder 719 ****************************************************************************** 720 */ 721 typedef struct 722 { 723 UWORD8 au1_intra_luma_mode_nxn_hash[NUM_PU_PARTS][MAX_INTRA_CANDIDATES]; 724 725 /** 726 * List of NxN PU candidates in CU for each partition 727 * valid only of if current cusize = mincusize 728 * +1 to signal the last flag invalid value of 255 needs to be stored 729 */ 730 UWORD8 au1_intra_luma_modes_nxn[NUM_PU_PARTS][(MAX_INTRA_CU_CANDIDATES * (4)) + 2 + 1]; 731 732 /* used to mark if this mode needs to be evaluated in auxiliary mode */ 733 /* if 1, this mode will be evaluated otherwise not.*/ 734 UWORD8 au1_nxn_eval_mark[NUM_PU_PARTS][MAX_INTRA_CU_CANDIDATES + 1]; 735 736 /** 737 * List of 2Nx2N PU candidates in CU 738 * +1 to signal the last flag invalid value of 255 needs to be stored 739 */ 740 UWORD8 au1_intra_luma_modes_2nx2n_tu_eq_cu[MAX_INTRA_CU_CANDIDATES + 1]; 741 742 /** 743 * List of 2Nx2N PU candidates in CU 744 * +1 to signal the last flag invalid value of 255 needs to be stored 745 */ 746 UWORD8 au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[MAX_INTRA_CU_CANDIDATES + 1]; 747 748 /* used to mark if this mode needs to be evaluated in auxiliary mode */ 749 /* if 1, this mode will be evaluated otherwise not.*/ 750 UWORD8 au1_2nx2n_tu_eq_cu_eval_mark[MAX_INTRA_CU_CANDIDATES + 1]; 751 752 /* used to mark if this mode needs to be evaluated in auxiliary mode */ 753 /* if 1, this mode will be evaluated otherwise not.*/ 754 UWORD8 au1_2nx2n_tu_eq_cu_by_2_eval_mark[MAX_INTRA_CU_CANDIDATES + 1]; 755 756 UWORD8 au1_num_modes_added[NUM_PU_PARTS]; 757 758 /** evaluate transform for cusize iff this flag is 1 */ 759 /** this flag should be set 0 if CU is 64x64 */ 760 UWORD8 b1_eval_tx_cusize : 1; 761 762 /** evaluate transform for cusize/2 iff this flag is 1 */ 763 UWORD8 b1_eval_tx_cusize_by2 : 1; 764 765 /** number of intra candidates for SATD evaluation in */ 766 UWORD8 b6_num_intra_cands : 6; 767 768 } cu_intra_cand_t; 769 770 /** 771 ****************************************************************************** 772 * @brief cu structure for mode analysis/evaluation 773 ****************************************************************************** 774 */ 775 typedef struct 776 { 777 /** CU X position in terms of min CU (8x8) units */ 778 UWORD8 b3_cu_pos_x : 3; 779 780 /** CU Y position in terms of min CU (8x8) units */ 781 UWORD8 b3_cu_pos_y : 3; 782 783 /** reserved bytes */ 784 UWORD8 b2_reserved : 2; 785 786 /** CU size 2N (width or height) in pixels */ 787 UWORD8 u1_cu_size; 788 789 /** Intra CU candidates after FAST CU decision (output of IPE) 790 * 8421 algo along with transform size evalution will 791 * be done for these modes in Encode loop pass. 792 */ 793 cu_intra_cand_t s_cu_intra_cand; 794 795 /** indicates the angular mode (0 - 34) for chroma, 796 * Note : No provision currently to take chroma through RDOPT or SATD 797 */ 798 UWORD8 u1_chroma_intra_pred_mode; 799 800 /** number of inter candidates in as_cu_inter_cand[] 801 * shall be 0 for intra frames. 802 * These inters are evaluated for RDOPT apart from merge/skip candidates 803 */ 804 UWORD8 u1_num_inter_cands; 805 806 /** List of candidates to be evalauted (SATD/RDOPT) for this CU 807 * @remarks : all merge/skip candidates not a part of this list 808 */ 809 cu_inter_cand_t as_cu_inter_cand[MAX_INTER_CU_CANDIDATES]; 810 811 WORD32 ai4_mv_cost[MAX_INTER_CU_CANDIDATES][NUM_INTER_PU_PARTS]; 812 813 #if REUSE_ME_COMPUTED_ERROR_FOR_INTER_CAND_SIFTING 814 WORD32 ai4_err_metric[MAX_INTER_CU_CANDIDATES][NUM_INTER_PU_PARTS]; 815 #endif 816 817 /* Flag to convey if Inta or Inter is the best candidate among the 818 candidates populated 819 0: If inter is the winner and 1: if Intra is winner*/ 820 UWORD8 u1_best_is_intra; 821 822 /** number of intra rdopt candidates 823 * @remarks : shall be <= u1_num_intra_cands 824 */ 825 UWORD8 u1_num_intra_rdopt_cands; 826 /** qp used during for CU 827 * @remarks : 828 */ 829 WORD8 i1_cu_qp; 830 /** Activity factor used in pre enc thread for deriving the Qp 831 * @remarks : This is in Q format 832 */ 833 WORD32 i4_act_factor[4][2]; 834 835 } cu_analyse_t; 836 837 /** 838 ****************************************************************************** 839 * @brief Structure for CU recursion 840 ****************************************************************************** 841 */ 842 typedef struct cur_ctb_cu_tree_t 843 { 844 /** CU X position in terms of min CU (8x8) units */ 845 UWORD8 b3_cu_pos_x : 3; 846 847 /** CU X position in terms of min CU (8x8) units */ 848 UWORD8 b3_cu_pos_y : 3; 849 850 /** reserved bytes */ 851 UWORD8 b2_reserved : 2; 852 853 UWORD8 u1_cu_size; 854 855 UWORD8 u1_intra_eval_enable; 856 857 UWORD8 u1_inter_eval_enable; 858 859 /* Flag that indicates whether to evaluate this node */ 860 /* during RDOPT evaluation. This does not mean that */ 861 /* evaluation of the children need to be abandoned */ 862 UWORD8 is_node_valid; 863 864 LWORD64 i8_best_rdopt_cost; 865 866 struct cur_ctb_cu_tree_t *ps_child_node_tl; 867 868 struct cur_ctb_cu_tree_t *ps_child_node_tr; 869 870 struct cur_ctb_cu_tree_t *ps_child_node_bl; 871 872 struct cur_ctb_cu_tree_t *ps_child_node_br; 873 874 } cur_ctb_cu_tree_t; 875 876 typedef struct 877 { 878 WORD32 num_best_results; 879 880 part_type_results_t as_best_results[NUM_BEST_ME_OUTPUTS]; 881 882 } block_data_32x32_t; 883 884 /** 885 ****************************************************************************** 886 * @brief Structure for storing data about all the 64x64 887 * block in a 64x64 CTB 888 ****************************************************************************** 889 */ 890 typedef block_data_32x32_t block_data_64x64_t; 891 892 /** 893 ****************************************************************************** 894 * @brief Structure for storing data about all 16 16x16 895 * blocks in a 64x64 CTB and each of their partitions 896 ****************************************************************************** 897 */ 898 typedef struct 899 { 900 WORD32 num_best_results; 901 902 /** 903 * mask of active partitions, Totally 17 bits. For a given partition 904 * id, as per PART_ID_T enum the corresponding bit position is 1/0 905 * indicating that partition is active or inactive 906 */ 907 /*WORD32 i4_part_mask;*/ 908 909 part_type_results_t as_best_results[NUM_BEST_ME_OUTPUTS]; 910 911 } block_data_16x16_t; 912 913 typedef struct 914 { 915 WORD32 num_best_results; 916 917 part_type_results_t as_best_results[NUM_BEST_ME_OUTPUTS]; 918 } block_data_8x8_t; 919 920 /** 921 ****************************************************************************** 922 * @brief Structure for data export from ME to Enc_Loop 923 ****************************************************************************** 924 */ 925 typedef struct 926 { 927 block_data_8x8_t as_8x8_block_data[64]; 928 929 block_data_16x16_t as_block_data[16]; 930 931 block_data_32x32_t as_32x32_block_data[4]; 932 933 block_data_64x64_t s_64x64_block_data; 934 935 } me_ctb_data_t; 936 937 /** 938 ****************************************************************************** 939 * @brief noise detection related structure 940 * 941 ****************************************************************************** 942 */ 943 944 typedef struct 945 { 946 WORD32 i4_noise_present; 947 948 UWORD8 au1_is_8x8Blk_noisy[MAX_CU_IN_CTB]; 949 950 UWORD32 au4_variance_src_16x16[MAX_CU_IN_CTB]; 951 } ihevce_ctb_noise_params; 952 953 /** 954 ****************************************************************************** 955 * @brief ctb structure for mode analysis/evaluation 956 ****************************************************************************** 957 */ 958 typedef struct 959 { 960 /** 961 * CU decision in a ctb is frozen by ME/IPE and populated in 962 * u4_packed_cu_split_flags. 963 * @remarks 964 * TODO:review comment 965 * bit0 : 64x64 split flag, (depth0 flag for 64x64 ctb unused for smaller ctb) 966 * bits 1-3 : not used 967 * bits 4-7 : 32x32 split flags; (depth1 flags for 64x64ctb / only bit4 used for 32x32ctb) 968 * bits 8-23: 16x16 split flags; (depth2 flags for 64x64 / depth1[bits8-11] for 32x32 [bit8 for ctb 16x16] ) 969 970 * if a split flag of n is set for depth 1, check the following split flags 971 * of [(8 + 4*(n-4)): (8 + 4*(n-4)+ 3)] for depth 2: 972 * 973 */ 974 UWORD32 u4_cu_split_flags; 975 976 UWORD8 u1_num_cus_in_ctb; 977 978 cur_ctb_cu_tree_t *ps_cu_tree; 979 980 me_ctb_data_t *ps_me_ctb_data; 981 982 ihevce_ctb_noise_params s_ctb_noise_params; 983 984 } ctb_analyse_t; 985 /** 986 ****************************************************************************** 987 * @brief Structures for tapping ssd and bit-estimate information for all CUs 988 ****************************************************************************** 989 */ 990 991 typedef struct 992 { 993 LWORD64 i8_cost; 994 WORD32 i4_idx; 995 } cost_idx_t; 996 997 /** 998 ****************************************************************************** 999 * @brief reference/non reference pic context for encoder 1000 ****************************************************************************** 1001 */ 1002 typedef struct 1003 1004 { 1005 /** 1006 * YUV buffer discriptor for the recon 1007 * Allocation per frame for Y = ((ALIGN(frame width, MAX_CTB_SIZE)) + 2 * PAD_HORZ)* 1008 * ((ALIGN(frame height, MAX_CTB_SIZE)) + 2 * PAD_VERT) 1009 */ 1010 iv_enc_yuv_buf_t s_yuv_buf_desc; 1011 1012 iv_enc_yuv_buf_src_t s_yuv_buf_desc_src; 1013 1014 /* Pointer to Luma (Y) sub plane buffers Horz/ Vert / HV grid */ 1015 /* When (L0ME_IN_OPENLOOP_MODE == 1), additional buffer required to store */ 1016 /* the fullpel plane for use as reference */ 1017 UWORD8 *apu1_y_sub_pel_planes[3 + L0ME_IN_OPENLOOP_MODE]; 1018 1019 /** 1020 * frm level pointer to pu bank for colocated mv access 1021 * Allocation per frame = (ALIGN(frame width, MAX_CTB_SIZE) / MIN_PU_SIZE) * 1022 * (ALIGN(frame height, MAX_CTB_SIZE) / MIN_PU_SIZE) 1023 */ 1024 pu_col_mv_t *ps_frm_col_mv; 1025 /** 1026 ************************************************************************ 1027 * Pointer to a PU map stored at frame level, 1028 * It contains a 7 bit pu index in encoder order w.r.t to a ctb at a min 1029 * granularirty of MIN_PU_SIZE size. 1030 ************************************************************************ 1031 */ 1032 UWORD8 *pu1_frm_pu_map; 1033 1034 /** CTB level frame buffer to store the accumulated sum of 1035 * number of PUs for every row */ 1036 UWORD16 *pu2_num_pu_map; 1037 1038 /** Offsets in the PU buffer at every CTB level */ 1039 UWORD32 *pu4_pu_off; 1040 1041 /** Collocated POC for reference list 0 1042 * ToDo: Change the array size when multiple slices are to be supported */ 1043 WORD32 ai4_col_l0_poc[HEVCE_MAX_REF_PICS]; 1044 1045 /** Collocated POC for reference list 1 */ 1046 WORD32 ai4_col_l1_poc[HEVCE_MAX_REF_PICS]; 1047 1048 /** 0 = top field, 1 = bottom field */ 1049 WORD32 i4_bottom_field; 1050 1051 /** top field first input in case of interlaced case */ 1052 WORD32 i4_topfield_first; 1053 1054 /** top field first input in case of interlaced case */ 1055 WORD32 i4_poc; 1056 1057 /** unique buffer id */ 1058 WORD32 i4_buf_id; 1059 1060 /** is this reference frame or not */ 1061 WORD32 i4_is_reference; 1062 1063 /** Picture type of current picture */ 1064 WORD32 i4_pic_type; 1065 1066 /** Flag to indicate whether current pictute is free or in use */ 1067 WORD32 i4_is_free; 1068 1069 /** Bit0 - of this Flag to indicate whether current pictute needs to be deblocked, 1070 padded and hpel planes need to be generated. 1071 These are turned off typically in non referecne pictures when psnr 1072 and recon dump is disabled. 1073 1074 Bit1 - of this flag set to 1 if sao is enabled. This is to enable deblocking when sao is enabled 1075 */ 1076 WORD32 i4_deblk_pad_hpel_cur_pic; 1077 1078 /** 1079 * weight and offset for this ref pic. To be initialized for every pic 1080 * based on the lap output 1081 */ 1082 ihevce_wght_offst_t s_weight_offset; 1083 1084 /** 1085 * Reciprocal of the lumaweight in q15 format 1086 */ 1087 WORD32 i4_inv_luma_wt; 1088 1089 /** 1090 * Log to base 2 of the common denominator used for luma weights across all ref pics 1091 */ 1092 WORD32 i4_log2_wt_denom; 1093 1094 /** 1095 * Used as Reference for encoding current picture flag 1096 */ 1097 WORD32 i4_used_by_cur_pic_flag; 1098 1099 #if ADAPT_COLOCATED_FROM_L0_FLAG 1100 WORD32 i4_frame_qp; 1101 #endif 1102 /* 1103 * IDR GOP number 1104 */ 1105 1106 WORD32 i4_idr_gop_num; 1107 1108 /* 1109 * non-ref-free_flag 1110 */ 1111 WORD32 i4_non_ref_free_flag; 1112 /** 1113 * Dependency manager instance for ME - Prev recon dep 1114 */ 1115 void *pv_dep_mngr_recon; 1116 1117 /*display num*/ 1118 WORD32 i4_display_num; 1119 } recon_pic_buf_t; 1120 1121 /** 1122 ****************************************************************************** 1123 * @brief Lambda values used for various cost computations 1124 ****************************************************************************** 1125 */ 1126 typedef struct 1127 { 1128 /************************************************************************/ 1129 /* The fields with the string 'type2' in their names are required */ 1130 /* when both 8bit and hbd lambdas are needed. The lambdas corresponding */ 1131 /* to the bit_depth != internal_bit_depth are stored in these fields */ 1132 /************************************************************************/ 1133 1134 /** 1135 * Closed loop SSD Lambda 1136 * This is multiplied with bits for RD cost computations in SSD mode 1137 * This is represented in q format with shift of LAMBDA_Q_SHIFT 1138 */ 1139 LWORD64 i8_cl_ssd_lambda_qf; 1140 1141 LWORD64 i8_cl_ssd_type2_lambda_qf; 1142 1143 /** 1144 * Closed loop SSD Lambda for chroma residue (chroma qp is different from luma qp) 1145 * This is multiplied with bits for RD cost computations in SSD mode 1146 * This is represented in q format with shift of LAMBDA_Q_SHIFT 1147 */ 1148 LWORD64 i8_cl_ssd_lambda_chroma_qf; 1149 1150 LWORD64 i8_cl_ssd_type2_lambda_chroma_qf; 1151 1152 /** 1153 * Closed loop SAD Lambda 1154 * This is multiplied with bits for RD cost computations in SAD mode 1155 * This is represented in q format with shift of LAMBDA_Q_SHIFT 1156 */ 1157 WORD32 i4_cl_sad_lambda_qf; 1158 1159 WORD32 i4_cl_sad_type2_lambda_qf; 1160 1161 /** 1162 * Open loop SAD Lambda 1163 * This is multiplied with bits for RD cost computations in SAD mode 1164 * This is represented in q format with shift of LAMBDA_Q_SHIFT 1165 */ 1166 WORD32 i4_ol_sad_lambda_qf; 1167 1168 WORD32 i4_ol_sad_type2_lambda_qf; 1169 1170 /** 1171 * Closed loop SATD Lambda 1172 * This is multiplied with bits for RD cost computations in SATD mode 1173 * This is represented in q format with shift of LAMBDA_Q_SHIFT 1174 */ 1175 WORD32 i4_cl_satd_lambda_qf; 1176 1177 WORD32 i4_cl_satd_type2_lambda_qf; 1178 1179 /** 1180 * Open loop SATD Lambda 1181 * This is multiplied with bits for RD cost computations in SATD mode 1182 * This is represented in q format with shift of LAMBDA_Q_SHIFT 1183 */ 1184 WORD32 i4_ol_satd_lambda_qf; 1185 1186 WORD32 i4_ol_satd_type2_lambda_qf; 1187 1188 double lambda_modifier; 1189 1190 double lambda_uv_modifier; 1191 1192 UWORD32 u4_chroma_cost_weighing_factor; 1193 1194 } frm_lambda_ctxt_t; 1195 /** 1196 ****************************************************************************** 1197 * @brief Mode attributes for 4x4 block populated by early decision 1198 ****************************************************************************** 1199 */ 1200 typedef struct 1201 { 1202 /* If best mode is present or not */ 1203 UWORD8 mode_present; 1204 1205 /** Best mode for the current 4x4 prediction block */ 1206 UWORD8 best_mode; 1207 1208 /** sad for the best mode for the current 4x4 prediction block */ 1209 UWORD16 sad; 1210 1211 /** cost for the best mode for the current 4x4 prediction block */ 1212 UWORD16 sad_cost; 1213 1214 } ihevce_ed_mode_attr_t; //early decision 1215 1216 /** 1217 ****************************************************************************** 1218 * @brief Structure at 4x4 block level which has parameters about early 1219 * intra or inter decision 1220 ****************************************************************************** 1221 */ 1222 typedef struct 1223 { 1224 /** 1225 * Final parameter of Intra-Inter early decision for the current 4x4. 1226 * 0 - invalid decision 1227 * 1 - eval intra only 1228 * 2 - eval inter only 1229 * 3 - eval both intra and inter 1230 */ 1231 UWORD8 intra_or_inter; 1232 1233 UWORD8 merge_success; 1234 1235 /** Best mode for the current 4x4 prediction block */ 1236 UWORD8 best_mode; 1237 1238 /** Best mode for the current 4x4 prediction block */ 1239 UWORD8 best_merge_mode; 1240 1241 /** Store SATD at 4*4 level for current layer (L1) */ 1242 WORD32 i4_4x4_satd; 1243 1244 } ihevce_ed_blk_t; //early decision 1245 1246 /* l1 ipe ctb analyze structure */ 1247 /* Contains cu level qp mod related information for all possible cu 1248 sizes (16,32,64 in L0) in a CTB*/ 1249 typedef struct 1250 { 1251 WORD32 i4_sum_4x4_satd[16]; 1252 WORD32 i4_min_4x4_satd[16]; 1253 1254 /* satd for L1_8x8 blocks in L1_32x32 1255 * [16] : num L1_8x8 in L1_32x32 1256 * [2] : 0 - sum of L1_4x4 @ L1_8x8 1257 * - equivalent to transform size of 16x16 @ L0 1258 * 1 - min/median of L1_4x4 @ L1_8x8 1259 * - equivalent to transform size of 8x8 @ L0 1260 */ 1261 WORD32 i4_8x8_satd[16][2]; 1262 1263 /* satd for L1_16x16 blocks in L1_32x32 1264 * [4] : num L1_16x16 in L1_32x32 1265 * [3] : 0 - sum of (sum of L1_4x4 @ L1_8x8) @ L1_16x16 1266 * - equivalent to transform size of 32x32 @ L0 1267 * 1 - min/median of (sum of L1_4x4 @ L1_8x8) @ L1_16x16 1268 * - equivalent to transform size of 16x16 @ L0 1269 * 2 - min/median of (min/median of L1_4x4 @ L1_8x8) @ L1_16x16 1270 * - equivalent to transform size of 8x8 @ L0 1271 */ 1272 WORD32 i4_16x16_satd[4][3]; 1273 1274 /* Please note that i4_32x32_satd[0][3] contains sum of all 32x32 */ 1275 /* satd for L1_32x32 blocks in L1_32x32 1276 * [1] : num L1_32x32 in L1_32x32 1277 * [4] : 0 - min/median of (sum of (sum of L1_4x4 @ L1_8x8) @ L1_16x16) @ L1_32x32 1278 * - equivalent to transform size of 32x32 @ L0 1279 * 1 - min/median of (sum of L1_4x4 @ L1_8x8) @ L1_32x32 1280 * - equivalent to transform size of 16x16 @ L0 1281 * 2 - min/median of (min/median of L1_4x4 @ L1_8x8) @ L1_32x32 1282 * - equivalent to transform size of 8x8 @ L0 1283 * 3 - sum of (sum of (sum of L1_4x4 @ L1_8x8) @ L1_16x16) @ L1_32x32 1284 */ 1285 WORD32 i4_32x32_satd[1][4]; 1286 1287 /*Store SATD at 8x8 level for current layer (L1)*/ 1288 WORD32 i4_best_satd_8x8[16]; 1289 1290 /* EIID: This will be used for early inter intra decisions */ 1291 /*SAD at 8x8 level for current layer (l1) */ 1292 /*Cost based on sad at 8x8 level for current layer (l1) */ 1293 WORD32 i4_best_sad_cost_8x8_l1_ipe[16]; 1294 1295 WORD32 i4_best_sad_8x8_l1_ipe[16]; 1296 /* SAD at 8x8 level for ME. All other cost are IPE cost */ 1297 WORD32 i4_best_sad_cost_8x8_l1_me[16]; 1298 1299 /* SAD at 8x8 level for ME. for given reference */ 1300 WORD32 i4_sad_cost_me_for_ref[16]; 1301 1302 /* SAD at 8x8 level for ME. for given reference */ 1303 WORD32 i4_sad_me_for_ref[16]; 1304 1305 /* SAD at 8x8 level for ME. All other cost are IPE cost */ 1306 WORD32 i4_best_sad_8x8_l1_me[16]; 1307 1308 WORD32 i4_best_sad_8x8_l1_me_for_decide[16]; 1309 1310 /*Mean @ L0 16x16*/ 1311 WORD32 ai4_16x16_mean[16]; 1312 1313 /*Mean @ L0 32x32*/ 1314 WORD32 ai4_32x32_mean[4]; 1315 1316 /*Mean @ L0 64x64*/ 1317 WORD32 i4_64x64_mean; 1318 1319 } ihevce_ed_ctb_l1_t; //early decision 1320 1321 /** 1322 ****************************************************************************** 1323 * @brief 8x8 Intra analyze structure 1324 ****************************************************************************** 1325 */ 1326 typedef struct 1327 { 1328 /** Best intra modes for 8x8 transform. 1329 * Insert 255 in the end to limit number of modes 1330 */ 1331 UWORD8 au1_best_modes_8x8_tu[MAX_INTRA_CU_CANDIDATES + 1]; 1332 1333 /** Best 8x8 intra modes for 4x4 transform 1334 * Insert 255 in the end to limit number of modes 1335 */ 1336 UWORD8 au1_best_modes_4x4_tu[MAX_INTRA_CU_CANDIDATES + 1]; 1337 1338 /** Best 4x4 intra modes 1339 * Insert 255 in the end to limit number of modes 1340 */ 1341 UWORD8 au1_4x4_best_modes[4][MAX_INTRA_CU_CANDIDATES + 1]; 1342 1343 /** flag to indicate if nxn pu mode (different pu at 4x4 level) is enabled */ 1344 UWORD8 b1_enable_nxn : 1; 1345 1346 /** valid cu flag : required for incomplete ctbs at frame boundaries */ 1347 UWORD8 b1_valid_cu : 1; 1348 1349 /** dummy bits */ 1350 UWORD8 b6_reserved : 6; 1351 1352 } intra8_analyse_t; 1353 1354 /** 1355 ****************************************************************************** 1356 * @brief 16x16 Intra analyze structure 1357 ****************************************************************************** 1358 */ 1359 typedef struct 1360 { 1361 /** Best intra modes for 16x16 transform. 1362 * Insert 255 in the end to limit number of modes 1363 */ 1364 UWORD8 au1_best_modes_16x16_tu[MAX_INTRA_CU_CANDIDATES + 1]; 1365 1366 /** Best 16x16 intra modes for 8x8 transform 1367 * Insert 255 in the end to limit number of modes 1368 */ 1369 UWORD8 au1_best_modes_8x8_tu[MAX_INTRA_CU_CANDIDATES + 1]; 1370 1371 /** 8x8 children intra analyze for this 16x16 */ 1372 intra8_analyse_t as_intra8_analyse[4]; 1373 1374 /* indicates if 16x16 is best cu or 8x8 cu */ 1375 UWORD8 b1_split_flag : 1; 1376 1377 /* indicates if 8x8 vs 16x16 rdo evaluation needed */ 1378 /* or only 8x8's rdo evaluation needed */ 1379 UWORD8 b1_merge_flag : 1; 1380 1381 /** 1382 * valid cu flag : required for incomplete ctbs at frame boundaries 1383 * or if CTB size is lower than 32 1384 */ 1385 UWORD8 b1_valid_cu : 1; 1386 1387 /** dummy bits */ 1388 UWORD8 b6_reserved : 5; 1389 1390 } intra16_analyse_t; 1391 1392 /** 1393 ****************************************************************************** 1394 * @brief 32x32 Intra analyze structure 1395 ****************************************************************************** 1396 */ 1397 typedef struct 1398 { 1399 /** Best intra modes for 32x32 transform. 1400 * Insert 255 in the end to limit number of modes 1401 */ 1402 UWORD8 au1_best_modes_32x32_tu[MAX_INTRA_CU_CANDIDATES + 1]; 1403 1404 /** Best 32x32 intra modes for 16x16 transform 1405 * Insert 255 in the end to limit number of modes 1406 */ 1407 UWORD8 au1_best_modes_16x16_tu[MAX_INTRA_CU_CANDIDATES + 1]; 1408 1409 /** 16x16 children intra analyze for this 32x32 */ 1410 intra16_analyse_t as_intra16_analyse[4]; 1411 1412 /* indicates if 32x32 is best cu or 16x16 cu */ 1413 UWORD8 b1_split_flag : 1; 1414 1415 /* indicates if 32x32 vs 16x16 rdo evaluation needed */ 1416 /* or 16x16 vs 8x8 evaluation is needed */ 1417 UWORD8 b1_merge_flag : 1; 1418 1419 /** 1420 * valid cu flag : required for incomplete ctbs at frame boundaries 1421 * or if CTB size is lower than 64 1422 */ 1423 UWORD8 b1_valid_cu : 1; 1424 1425 /** dummy bits */ 1426 UWORD8 b6_reserved : 5; 1427 1428 } intra32_analyse_t; 1429 1430 /** 1431 ****************************************************************************** 1432 * @brief IPE L0 analyze structure for L0 ME to do intra/inter CU decisions 1433 * This is a CTB level structure encapsulating IPE modes, cost at all 1434 * level. IPE also recommemds max intra CU sizes which is required 1435 * by ME for CU size determination in intra dominant CTB 1436 ****************************************************************************** 1437 */ 1438 typedef struct 1439 { 1440 /** Best 64x64 intra modes for 32x32 transform. 1441 * Insert 255 in the end to limit number of modes 1442 */ 1443 UWORD8 au1_best_modes_32x32_tu[MAX_INTRA_CU_CANDIDATES + 1]; 1444 1445 /** 32x32 children intra analyze for this 32x32 */ 1446 intra32_analyse_t as_intra32_analyse[4]; 1447 1448 /* indicates if 64x64 is best CUs or 32x32 CUs */ 1449 UWORD8 u1_split_flag; 1450 1451 /* CTB level best 8x8 intra costs */ 1452 WORD32 ai4_best8x8_intra_cost[MAX_CU_IN_CTB]; 1453 1454 /* CTB level best 16x16 intra costs */ 1455 WORD32 ai4_best16x16_intra_cost[MAX_CU_IN_CTB >> 2]; 1456 1457 /* CTB level best 32x32 intra costs */ 1458 WORD32 ai4_best32x32_intra_cost[MAX_CU_IN_CTB >> 4]; 1459 1460 /* best 64x64 intra cost */ 1461 WORD32 i4_best64x64_intra_cost; 1462 1463 /* 1464 @L0 level 1465 4 => 0 - 32x32 TU in 64x64 CU 1466 1 - 16x16 TU in 64x64 CU 1467 2 - 8x8 TU in 64x64 CU 1468 3 - 64x64 CU 1469 2 => Intra/Inter */ 1470 WORD32 i4_64x64_act_factor[4][2]; 1471 1472 /* 1473 @L0 level 1474 4 => num 32x32 in CTB 1475 3 => 0 - 32x32 TU in 64x64 CU 1476 1 - 16x16 TU in 64x64 CU 1477 2 - 8x8 TU in 64x64 CU 1478 2 => Intra/Inter */ 1479 WORD32 i4_32x32_act_factor[4][3][2]; 1480 1481 /* 1482 @L0 level 1483 16 => num 16x16 in CTB 1484 2 => 0 - 16x16 TU in 64x64 CU 1485 1 - 8x8 TU in 64x64 CU 1486 2 => Intra/Inter */ 1487 WORD32 i4_16x16_act_factor[16][2][2]; 1488 1489 WORD32 nodes_created_in_cu_tree; 1490 1491 cur_ctb_cu_tree_t *ps_cu_tree_root; 1492 1493 WORD32 ai4_8x8_act_factor[16]; 1494 WORD32 ai4_best_sad_8x8_l1_me[MAX_CU_IN_CTB]; 1495 WORD32 ai4_best_sad_8x8_l1_ipe[MAX_CU_IN_CTB]; 1496 WORD32 ai4_best_sad_cost_8x8_l1_me[MAX_CU_IN_CTB]; 1497 WORD32 ai4_best_sad_cost_8x8_l1_ipe[MAX_CU_IN_CTB]; 1498 1499 /*Ctb level accumalated satd*/ 1500 WORD32 i4_ctb_acc_satd; 1501 1502 /*Ctb level accumalated mpm bits*/ 1503 WORD32 i4_ctb_acc_mpm_bits; 1504 1505 } ipe_l0_ctb_analyse_for_me_t; 1506 1507 typedef struct 1508 { 1509 WORD16 i2_mv_x; 1510 WORD16 i2_mv_y; 1511 } global_mv_t; 1512 1513 /** 1514 ****************************************************************************** 1515 * @brief Pre Encode pass and ME pass shared variables and buffers 1516 ****************************************************************************** 1517 */ 1518 typedef struct 1519 { 1520 /** 1521 * Buffer id 1522 */ 1523 WORD32 i4_buf_id; 1524 1525 /** 1526 * Flag will be set to 1 by frame processing thread after receiving flush 1527 * command from application 1528 */ 1529 WORD32 i4_end_flag; 1530 1531 /** frame leve ctb analyse buffer pointer */ 1532 ctb_analyse_t *ps_ctb_analyse; 1533 1534 /** frame level cu analyse buffer pointer for IPE */ 1535 //cu_analyse_t *ps_cu_analyse; 1536 1537 /** current input pointer */ 1538 ihevce_lap_enc_buf_t *ps_curr_inp; 1539 1540 /** current inp buffer id */ 1541 WORD32 curr_inp_buf_id; 1542 1543 /** Slice header parameters */ 1544 slice_header_t s_slice_hdr; 1545 1546 /** sps parameters activated by current slice */ 1547 sps_t *ps_sps; 1548 1549 /** pps parameters activated by current slice */ 1550 pps_t *ps_pps; 1551 1552 /** vps parameters activated by current slice */ 1553 vps_t *ps_vps; 1554 /** Pointer to Penultilate Layer context memory internally has MV bank buff and related params */ 1555 void *pv_me_lyr_ctxt; 1556 1557 /** Pointer to Penultilate Layer NV bank context memory */ 1558 void *pv_me_lyr_bnk_ctxt; 1559 1560 /** Pointer to Penultilate Layer MV bank buff */ 1561 void *pv_me_mv_bank; 1562 1563 /** Pointer to Penultilate Layer reference idx buffer */ 1564 void *pv_me_ref_idx; 1565 /** 1566 * Array to store 8x8 cost (partial 8x8 sad + level adjusted cost) 1567 * The order of storing is raster scan order within CTB and 1568 * CTB order is raster scan within frame. 1569 */ 1570 double *plf_intra_8x8_cost; 1571 1572 /** 1573 * L0 layer ctb anaylse frame level buffer. 1574 * IPE wil populate the cost and best modes at all levels in this buffer 1575 * for every CTB in a frame 1576 */ 1577 // moved to shorter buffer queue 1578 //ipe_l0_ctb_analyse_for_me_t *ps_ipe_analyse_ctb; 1579 1580 /** Layer L1 buffer pointer */ 1581 ihevce_ed_blk_t *ps_layer1_buf; 1582 1583 /** Layer L2 buffer pointer */ 1584 ihevce_ed_blk_t *ps_layer2_buf; 1585 1586 /*ME reverse map info*/ 1587 UWORD8 *pu1_me_reverse_map_info; 1588 1589 /** Buffer pointer for CTB level information in pre intra pass*/ 1590 ihevce_ed_ctb_l1_t *ps_ed_ctb_l1; 1591 1592 /** vps parameters activated by current slice */ 1593 sei_params_t s_sei; 1594 1595 /** nal_type for the slice to be encoded */ 1596 WORD32 i4_slice_nal_type; 1597 1598 /** input time stamp in terms of ticks: lower 32 */ 1599 WORD32 i4_inp_timestamp_low; 1600 1601 /** input time stamp in terms of ticks: higher 32 */ 1602 WORD32 i4_inp_timestamp_high; 1603 1604 /** input frame ctxt of app to be retured in output buffer */ 1605 void *pv_app_frm_ctxt; 1606 1607 /** current frm valid flag : 1608 * will be 1 if valid input was processed by frame proc thrd 1609 */ 1610 WORD32 i4_frm_proc_valid_flag; 1611 1612 /** 1613 * Qp to be used for current frame 1614 */ 1615 WORD32 i4_curr_frm_qp; 1616 1617 /** 1618 * Frame level Lambda parameters 1619 */ 1620 frm_lambda_ctxt_t as_lambda_prms[IHEVCE_MAX_NUM_BITRATES]; 1621 1622 /** Frame-levelSATDcost accumalator */ 1623 LWORD64 i8_frame_acc_satd_cost; 1624 1625 /** Frame - L1 coarse me cost accumulated */ 1626 LWORD64 i8_acc_frame_coarse_me_cost; 1627 /** Frame - L1 coarse me cost accumulated */ 1628 //LWORD64 i8_acc_frame_coarse_me_cost_for_ref; 1629 1630 /** Frame - L1 coarse me sad accumulated */ 1631 LWORD64 i8_acc_frame_coarse_me_sad; 1632 1633 /* Averge activity of 4x4 blocks from previous frame 1634 * If L1, maps to 8*8 in L0 1635 */ 1636 WORD32 i4_curr_frame_4x4_avg_act; 1637 1638 WORD32 ai4_mod_factor_derived_by_variance[2]; 1639 1640 float f_strength; 1641 1642 /* Averge activity of 8x8 blocks from previous frame 1643 * If L1, maps to 16*16 in L0 1644 */ 1645 1646 long double ld_curr_frame_8x8_log_avg[2]; 1647 1648 LWORD64 i8_curr_frame_8x8_avg_act[2]; 1649 1650 LWORD64 i8_curr_frame_8x8_sum_act[2]; 1651 1652 WORD32 i4_curr_frame_8x8_sum_act_for_strength[2]; 1653 1654 ULWORD64 u8_curr_frame_8x8_sum_act_sqr; 1655 1656 WORD32 i4_curr_frame_8x8_num_blks[2]; 1657 1658 LWORD64 i8_acc_frame_8x8_sum_act[2]; 1659 LWORD64 i8_acc_frame_8x8_sum_act_sqr; 1660 WORD32 i4_acc_frame_8x8_num_blks[2]; 1661 LWORD64 i8_acc_frame_8x8_sum_act_for_strength; 1662 LWORD64 i8_curr_frame_8x8_sum_act_for_strength; 1663 1664 /* Averge activity of 16x16 blocks from previous frame 1665 * If L1, maps to 32*32 in L0 1666 */ 1667 1668 long double ld_curr_frame_16x16_log_avg[3]; 1669 1670 LWORD64 i8_curr_frame_16x16_avg_act[3]; 1671 1672 LWORD64 i8_curr_frame_16x16_sum_act[3]; 1673 1674 WORD32 i4_curr_frame_16x16_num_blks[3]; 1675 1676 LWORD64 i8_acc_frame_16x16_sum_act[3]; 1677 WORD32 i4_acc_frame_16x16_num_blks[3]; 1678 1679 /* Averge activity of 32x32 blocks from previous frame 1680 * If L1, maps to 64*64 in L0 1681 */ 1682 1683 long double ld_curr_frame_32x32_log_avg[3]; 1684 1685 LWORD64 i8_curr_frame_32x32_avg_act[3]; 1686 1687 global_mv_t s_global_mv[MAX_NUM_REF]; 1688 LWORD64 i8_curr_frame_32x32_sum_act[3]; 1689 1690 WORD32 i4_curr_frame_32x32_num_blks[3]; 1691 1692 LWORD64 i8_acc_frame_32x32_sum_act[3]; 1693 WORD32 i4_acc_frame_32x32_num_blks[3]; 1694 1695 LWORD64 i8_acc_num_blks_high_sad; 1696 1697 LWORD64 i8_total_blks; 1698 1699 WORD32 i4_complexity_percentage; 1700 1701 WORD32 i4_is_high_complex_region; 1702 1703 WORD32 i4_avg_noise_thrshld_4x4; 1704 1705 LWORD64 i8_curr_frame_mean_sum; 1706 WORD32 i4_curr_frame_mean_num_blks; 1707 LWORD64 i8_curr_frame_avg_mean_act; 1708 1709 } pre_enc_me_ctxt_t; 1710 1711 /** 1712 ****************************************************************************** 1713 * @brief buffers from L0 IPE to ME and enc loop 1714 ****************************************************************************** 1715 */ 1716 typedef struct 1717 { 1718 WORD32 i4_size; 1719 1720 ipe_l0_ctb_analyse_for_me_t *ps_ipe_analyse_ctb; 1721 } pre_enc_L0_ipe_encloop_ctxt_t; 1722 /** 1723 ****************************************************************************** 1724 * @brief Frame process and Entropy coding pass shared variables and buffers 1725 ****************************************************************************** 1726 */ 1727 1728 typedef struct 1729 { 1730 /*PIC level Info*/ 1731 ULWORD64 i8_total_cu; 1732 ULWORD64 i8_total_cu_min_8x8; 1733 ULWORD64 i8_total_pu; 1734 ULWORD64 i8_total_intra_cu; 1735 ULWORD64 i8_total_inter_cu; 1736 ULWORD64 i8_total_skip_cu; 1737 ULWORD64 i8_total_cu_based_on_size[4]; 1738 1739 ULWORD64 i8_total_intra_pu; 1740 ULWORD64 i8_total_merge_pu; 1741 ULWORD64 i8_total_non_skipped_inter_pu; 1742 1743 ULWORD64 i8_total_2nx2n_intra_pu[4]; 1744 ULWORD64 i8_total_nxn_intra_pu; 1745 ULWORD64 i8_total_2nx2n_inter_pu[4]; 1746 ULWORD64 i8_total_smp_inter_pu[4]; 1747 ULWORD64 i8_total_amp_inter_pu[3]; 1748 ULWORD64 i8_total_nxn_inter_pu[3]; 1749 1750 ULWORD64 i8_total_L0_mode; 1751 ULWORD64 i8_total_L1_mode; 1752 ULWORD64 i8_total_BI_mode; 1753 1754 ULWORD64 i8_total_L0_ref_idx[MAX_DPB_SIZE]; 1755 ULWORD64 i8_total_L1_ref_idx[MAX_DPB_SIZE]; 1756 1757 ULWORD64 i8_total_tu; 1758 ULWORD64 i8_total_non_coded_tu; 1759 ULWORD64 i8_total_inter_coded_tu; 1760 ULWORD64 i8_total_intra_coded_tu; 1761 1762 ULWORD64 i8_total_tu_based_on_size[4]; 1763 ULWORD64 i8_total_tu_cu64[4]; 1764 ULWORD64 i8_total_tu_cu32[4]; 1765 ULWORD64 i8_total_tu_cu16[3]; 1766 ULWORD64 i8_total_tu_cu8[2]; 1767 1768 LWORD64 i8_total_qp; 1769 LWORD64 i8_total_qp_min_cu; 1770 WORD32 i4_min_qp; 1771 WORD32 i4_max_qp; 1772 LWORD64 i8_sum_squared_frame_qp; 1773 LWORD64 i8_total_frame_qp; 1774 WORD32 i4_max_frame_qp; 1775 float f_total_buffer_underflow; 1776 float f_total_buffer_overflow; 1777 float f_max_buffer_underflow; 1778 float f_max_buffer_overflow; 1779 1780 UWORD8 i1_num_ref_idx_l0_active; 1781 UWORD8 i1_num_ref_idx_l1_active; 1782 1783 WORD32 i4_ref_poc_l0[MAX_DPB_SIZE]; 1784 WORD32 i4_ref_poc_l1[MAX_DPB_SIZE]; 1785 1786 WORD8 i1_list_entry_l0[MAX_DPB_SIZE]; 1787 DOUBLE i2_luma_weight_l0[MAX_DPB_SIZE]; 1788 WORD16 i2_luma_offset_l0[MAX_DPB_SIZE]; 1789 WORD8 i1_list_entry_l1[MAX_DPB_SIZE]; 1790 DOUBLE i2_luma_weight_l1[MAX_DPB_SIZE]; 1791 WORD16 i2_luma_offset_l1[MAX_DPB_SIZE]; 1792 1793 ULWORD64 u8_bits_estimated_intra; 1794 ULWORD64 u8_bits_estimated_inter; 1795 ULWORD64 u8_bits_estimated_slice_header; 1796 ULWORD64 u8_bits_estimated_sao; 1797 ULWORD64 u8_bits_estimated_split_cu_flag; 1798 ULWORD64 u8_bits_estimated_cu_hdr_bits; 1799 ULWORD64 u8_bits_estimated_split_tu_flag; 1800 ULWORD64 u8_bits_estimated_qp_delta_bits; 1801 ULWORD64 u8_bits_estimated_cbf_luma_bits; 1802 ULWORD64 u8_bits_estimated_cbf_chroma_bits; 1803 1804 ULWORD64 u8_bits_estimated_res_luma_bits; 1805 ULWORD64 u8_bits_estimated_res_chroma_bits; 1806 1807 ULWORD64 u8_bits_estimated_ref_id; 1808 ULWORD64 u8_bits_estimated_mvd; 1809 ULWORD64 u8_bits_estimated_merge_flag; 1810 ULWORD64 u8_bits_estimated_mpm_luma; 1811 ULWORD64 u8_bits_estimated_mpm_chroma; 1812 1813 ULWORD64 u8_total_bits_generated; 1814 ULWORD64 u8_total_bits_vbv; 1815 1816 ULWORD64 u8_total_I_bits_generated; 1817 ULWORD64 u8_total_P_bits_generated; 1818 ULWORD64 u8_total_B_bits_generated; 1819 1820 UWORD32 u4_frame_sad; 1821 UWORD32 u4_frame_intra_sad; 1822 UWORD32 u4_frame_inter_sad; 1823 1824 ULWORD64 i8_frame_cost; 1825 ULWORD64 i8_frame_intra_cost; 1826 ULWORD64 i8_frame_inter_cost; 1827 } s_pic_level_acc_info_t; 1828 1829 typedef struct 1830 { 1831 UWORD32 u4_target_bit_rate_sei_entropy; 1832 UWORD32 u4_buffer_size_sei_entropy; 1833 UWORD32 u4_dbf_entropy; 1834 1835 } s_pic_level_sei_info_t; 1836 /** 1837 ****************************************************************************** 1838 * @brief ME pass and Main enocde pass shared variables and buffers 1839 ****************************************************************************** 1840 */ 1841 typedef struct 1842 { 1843 /** 1844 * Buffer id 1845 */ 1846 WORD32 i4_buf_id; 1847 1848 /** 1849 * Flag will be set to 1 by frame processing thread after receiving flush 1850 * command from application 1851 */ 1852 WORD32 i4_end_flag; 1853 1854 /** current input pointer */ 1855 ihevce_lap_enc_buf_t *ps_curr_inp; 1856 1857 /** current inp buffer id */ 1858 WORD32 curr_inp_buf_id; 1859 1860 /** current input buffers from ME */ 1861 pre_enc_me_ctxt_t *ps_curr_inp_from_me_prms; 1862 1863 /** current inp buffer id from ME */ 1864 WORD32 curr_inp_from_me_buf_id; 1865 1866 /** current input buffers from L0 IPE */ 1867 pre_enc_L0_ipe_encloop_ctxt_t *ps_curr_inp_from_l0_ipe_prms; 1868 1869 /** current inp buffer id from L0 IPE */ 1870 WORD32 curr_inp_from_l0_ipe_buf_id; 1871 1872 /** Slice header parameters */ 1873 slice_header_t s_slice_hdr; 1874 1875 /** current frm valid flag : 1876 * will be 1 if valid input was processed by frame proc thrd 1877 */ 1878 WORD32 i4_frm_proc_valid_flag; 1879 1880 /** 1881 * Array of reference picture list for ping instance 1882 * 2=> ref_pic_list0 and ref_pic_list1 1883 */ 1884 recon_pic_buf_t as_ref_list[IHEVCE_MAX_NUM_BITRATES][2][HEVCE_MAX_REF_PICS * 2]; 1885 1886 /** 1887 * Array of reference picture list 1888 * 2=> ref_pic_list0 and ref_pic_list1 1889 */ 1890 recon_pic_buf_t *aps_ref_list[IHEVCE_MAX_NUM_BITRATES][2][HEVCE_MAX_REF_PICS * 2]; 1891 1892 /** Job Queue Memory encode */ 1893 job_queue_t *ps_job_q_enc; 1894 1895 /** Array of Job Queue handles of enc group for ping and pong instance*/ 1896 job_queue_handle_t as_job_que_enc_hdls[NUM_ENC_JOBS_QUES]; 1897 1898 /** Array of Job Queue handles of enc group for re-encode*/ 1899 job_queue_handle_t as_job_que_enc_hdls_reenc[NUM_ENC_JOBS_QUES]; 1900 /** frame level me_ctb_data_t buffer pointer 1901 */ 1902 me_ctb_data_t *ps_cur_ctb_me_data; 1903 1904 /** frame level cur_ctb_cu_tree_t buffer pointer for ME 1905 */ 1906 cur_ctb_cu_tree_t *ps_cur_ctb_cu_tree; 1907 1908 /** Pointer to Dep. Mngr for CTBs processed in every row of a frame. 1909 * ME is producer, EncLoop is the consumer 1910 */ 1911 void *pv_dep_mngr_encloop_dep_me; 1912 1913 } me_enc_rdopt_ctxt_t; 1914 1915 typedef struct 1916 { 1917 UWORD32 u4_payload_type; 1918 UWORD32 u4_payload_length; 1919 UWORD8 *pu1_sei_payload; 1920 } sei_payload_t; 1921 1922 typedef struct 1923 { 1924 /** 1925 * Flag will be set to 1 by frame processing thread after receiving flush 1926 * command from application 1927 */ 1928 WORD32 i4_end_flag; 1929 1930 /** frame level ctb allocation for ctb after aligning to max cu size */ 1931 ctb_enc_loop_out_t *ps_frm_ctb_data; 1932 1933 /** frame level cu allocation for ctb after aligning to max cu size */ 1934 cu_enc_loop_out_t *ps_frm_cu_data; 1935 1936 /** frame level tu allocation for ctb after aligning to max cu size */ 1937 tu_enc_loop_out_t *ps_frm_tu_data; 1938 1939 /** frame level pu allocation for ctb after aligning to max cu size */ 1940 pu_t *ps_frm_pu_data; 1941 1942 /** frame level coeff allocation for ctb after aligning to max cu size */ 1943 void *pv_coeff_data; 1944 1945 /** Slice header parameters */ 1946 slice_header_t s_slice_hdr; 1947 1948 /** sps parameters activated by current slice */ 1949 sps_t *ps_sps; 1950 1951 /** pps parameters activated by current slice */ 1952 pps_t *ps_pps; 1953 1954 /** vps parameters activated by current slice */ 1955 vps_t *ps_vps; 1956 1957 /** vps parameters activated by current slice */ 1958 sei_params_t s_sei; 1959 1960 /* Flag to indicate if AUD NAL is present */ 1961 WORD8 i1_aud_present_flag; 1962 1963 /* Flag to indicate if EOS NAL is present */ 1964 WORD8 i1_eos_present_flag; 1965 1966 /** nal_type for the slice to be encoded */ 1967 WORD32 i4_slice_nal_type; 1968 1969 /** input time stamp in terms of ticks: lower 32 */ 1970 WORD32 i4_inp_timestamp_low; 1971 1972 /** input time stamp in terms of ticks: higher 32 */ 1973 WORD32 i4_inp_timestamp_high; 1974 1975 /** input frame ctxt of app to be retured in output buffer */ 1976 void *pv_app_frm_ctxt; 1977 1978 /** current frm valid flag : 1979 * will be 1 if valid input was processed by frame proc thrd 1980 */ 1981 WORD32 i4_frm_proc_valid_flag; 1982 1983 /** To support entropy sync the bitstream offset of each CTB row 1984 * is populated in this array any put in slice header in the end 1985 */ 1986 WORD32 ai4_entry_point_offset[MAX_NUM_CTB_ROWS_FRM]; 1987 1988 /** RDopt estimation of bytes generated based on which rc update happens 1989 * 1990 */ 1991 WORD32 i4_rdopt_bits_generated_estimate; 1992 1993 /* These params are passed from enc-threads to entropy thread for 1994 passing params needed for PSNR caclulation and encoding 1995 summary prints */ 1996 DOUBLE lf_luma_mse; 1997 DOUBLE lf_cb_mse; 1998 DOUBLE lf_cr_mse; 1999 2000 DOUBLE lf_luma_ssim; 2001 DOUBLE lf_cb_ssim; 2002 DOUBLE lf_cr_ssim; 2003 2004 WORD32 i4_qp; 2005 WORD32 i4_poc; 2006 WORD32 i4_display_num; 2007 WORD32 i4_pic_type; 2008 2009 /** I-only SCD */ 2010 WORD32 i4_is_I_scenecut; 2011 2012 WORD32 i4_is_non_I_scenecut; 2013 WORD32 i4_sub_pic_level_rc; 2014 2015 WORD32 ai4_frame_bits_estimated; 2016 s_pic_level_acc_info_t s_pic_level_info; 2017 2018 LWORD64 i8_buf_level_bitrate_change; 2019 2020 WORD32 i4_is_end_of_idr_gop; 2021 2022 sei_payload_t as_sei_payload[MAX_NUMBER_OF_SEI_PAYLOAD]; 2023 2024 UWORD32 u4_num_sei_payload; 2025 /* Flag used only in mres single output case to flush out one res and start with next */ 2026 WORD32 i4_out_flush_flag; 2027 2028 } frm_proc_ent_cod_ctxt_t; 2029 2030 /** 2031 ****************************************************************************** 2032 * @brief ME pass and Main enocde pass shared variables and buffers 2033 ****************************************************************************** 2034 */ 2035 typedef struct 2036 { 2037 /*BitRate ID*/ 2038 WORD32 i4_br_id; 2039 2040 /*Frame ID*/ 2041 WORD32 i4_frm_id; 2042 2043 /*Number of CTB, after ich data is populated*/ 2044 WORD32 i4_ctb_count_in_data; 2045 2046 /*Number of CTB, after ich scale is computed*/ 2047 WORD32 i4_ctb_count_out_scale; 2048 2049 /*Bits estimated for the frame */ 2050 /* For NON-I SCD max buf bits*/ 2051 LWORD64 i8_frame_bits_estimated; 2052 2053 /* Bits consumed till the nctb*/ 2054 LWORD64 i8_nctb_bits_consumed; 2055 2056 /* Bits consumed till the nctb*/ 2057 LWORD64 i8_acc_bits_consumed; 2058 2059 /*Frame level Best of Ipe and ME sad*/ 2060 LWORD64 i8_frame_l1_me_sad; 2061 2062 /*SAD accumalted till NCTB*/ 2063 LWORD64 i8_nctb_l1_me_sad; 2064 2065 /*Frame level IPE sad*/ 2066 LWORD64 i8_frame_l1_ipe_sad; 2067 2068 /*SAD accumalted till NCTB*/ 2069 LWORD64 i8_nctb_l1_ipe_sad; 2070 2071 /*Frame level L0 IPE satd*/ 2072 LWORD64 i8_frame_l0_ipe_satd; 2073 2074 /*L0 SATD accumalted till NCTB*/ 2075 LWORD64 i8_nctb_l0_ipe_satd; 2076 2077 /*Frame level Activity factor acc at 8x8 level */ 2078 LWORD64 i8_frame_l1_activity_fact; 2079 2080 /*NCTB Activity factor acc at 8x8 level */ 2081 LWORD64 i8_nctb_l1_activity_fact; 2082 2083 /*L0 MPM bits accumalted till NCTB*/ 2084 LWORD64 i8_nctb_l0_mpm_bits; 2085 2086 /*Encoder hdr accumalted till NCTB*/ 2087 LWORD64 i8_nctb_hdr_bits_consumed; 2088 2089 } ihevce_sub_pic_rc_ctxt_t; 2090 2091 /** 2092 ****************************************************************************** 2093 * @brief Memoery manager context (stores the memory tables allcoated) 2094 ****************************************************************************** 2095 */ 2096 typedef struct 2097 { 2098 /** 2099 * Total number of memtabs (Modules and system) 2100 * during create time 2101 */ 2102 WORD32 i4_num_create_memtabs; 2103 2104 /** 2105 * Pointer to the mem tabs 2106 * of crate time 2107 */ 2108 iv_mem_rec_t *ps_create_memtab; 2109 2110 /** 2111 * Total number of memtabs Data and control Ques 2112 * during Ques create time 2113 */ 2114 WORD32 i4_num_q_memtabs; 2115 2116 /** 2117 * Pointer to the mem tabs 2118 * of crate time 2119 */ 2120 iv_mem_rec_t *ps_q_memtab; 2121 2122 } enc_mem_mngr_ctxt; 2123 2124 /** 2125 ****************************************************************************** 2126 * @brief Encoder Interafce Queues Context 2127 ****************************************************************************** 2128 */ 2129 typedef struct 2130 { 2131 /** Number of Queues at interface context level */ 2132 WORD32 i4_num_queues; 2133 2134 /** Array of Queues handle */ 2135 void *apv_q_hdl[IHEVCE_MAX_NUM_QUEUES]; 2136 2137 /** Mutex for encuring thread safety of the access of the queues */ 2138 void *pv_q_mutex_hdl; 2139 2140 } enc_q_ctxt_t; 2141 2142 /** 2143 ****************************************************************************** 2144 * @brief Module context of different modules in encoder 2145 ****************************************************************************** 2146 */ 2147 2148 typedef struct 2149 { 2150 /** Motion estimation context pointer */ 2151 void *pv_me_ctxt; 2152 /** Coarse Motion estimation context pointer */ 2153 void *pv_coarse_me_ctxt; 2154 2155 /** Intra Prediction context pointer */ 2156 void *pv_ipe_ctxt; 2157 2158 /** Encode Loop context pointer */ 2159 void *pv_enc_loop_ctxt; 2160 2161 /** Entropy Coding context pointer */ 2162 void *apv_ent_cod_ctxt[IHEVCE_MAX_NUM_BITRATES]; 2163 2164 /** Look Ahead Processing context pointer */ 2165 void *pv_lap_ctxt; 2166 /** Rate control context pointer */ 2167 void *apv_rc_ctxt[IHEVCE_MAX_NUM_BITRATES]; 2168 /** Decomposition pre intra context pointer */ 2169 void *pv_decomp_pre_intra_ctxt; 2170 2171 } module_ctxt_t; 2172 2173 /** 2174 ****************************************************************************** 2175 * @brief Threads semaphore handles 2176 ****************************************************************************** 2177 */ 2178 typedef struct 2179 { 2180 /** LAP semaphore handle */ 2181 void *pv_lap_sem_handle; 2182 2183 /** Encode frame Process semaphore handle */ 2184 void *pv_enc_frm_proc_sem_handle; 2185 2186 /** Pre Encode frame Process semaphore handle */ 2187 void *pv_pre_enc_frm_proc_sem_handle; 2188 2189 /** Entropy coding semaphore handle 2190 One semaphore for each entropy thread, i.e. for each bit-rate instance*/ 2191 void *apv_ent_cod_sem_handle[IHEVCE_MAX_NUM_BITRATES]; 2192 2193 /** 2194 * Semaphore handle corresponding to get free inp frame buff 2195 * function call from app if called in blocking mode 2196 */ 2197 void *pv_inp_data_sem_handle; 2198 2199 /** 2200 * Semaphore handle corresponding to get free inp control command buff 2201 * function call from app if called in blocking mode 2202 */ 2203 void *pv_inp_ctrl_sem_handle; 2204 2205 /** 2206 * Semaphore handle corresponding to get filled out bitstream buff 2207 * function call from app if called in blocking mode 2208 */ 2209 void *apv_out_strm_sem_handle[IHEVCE_MAX_NUM_BITRATES]; 2210 2211 /** 2212 * Semaphore handle corresponding to get filled out recon buff 2213 * function call from app if called in blocking mode 2214 */ 2215 void *apv_out_recon_sem_handle[IHEVCE_MAX_NUM_BITRATES]; 2216 2217 /** 2218 * Semaphore handle corresponding to get filled out control status buff 2219 * function call from app if called in blocking mode 2220 */ 2221 void *pv_out_ctrl_sem_handle; 2222 2223 /** 2224 * Semaphore handle corresponding to get filled out control status buff 2225 * function call from app if called in blocking mode 2226 */ 2227 void *pv_lap_inp_data_sem_hdl; 2228 2229 /** 2230 * Semaphore handle corresponding to get filled out control status buff 2231 * function call from app if called in blocking mode 2232 */ 2233 void *pv_preenc_inp_data_sem_hdl; 2234 2235 /** 2236 * Semaphore handle corresponding to Multi Res Single output case 2237 */ 2238 void *pv_ent_common_mres_sem_hdl; 2239 void *pv_out_common_mres_sem_hdl; 2240 2241 } thrd_que_sem_hdl_t; 2242 2243 /** 2244 ****************************************************************************** 2245 * @brief Frame level structure which has parameters about CTBs 2246 ****************************************************************************** 2247 */ 2248 typedef struct 2249 { 2250 /** CTB size of all CTB in a frame in pixels 2251 * this will be create time value, 2252 * run time change in this value is not supported 2253 */ 2254 WORD32 i4_ctb_size; 2255 2256 /** Minimum CU size of CTB in a frame in pixels 2257 * this will be create time value, 2258 * run time change in this value is not supported 2259 */ 2260 WORD32 i4_min_cu_size; 2261 2262 /** Worst case num CUs in CTB based on i4_ctb_size */ 2263 WORD32 i4_num_cus_in_ctb; 2264 2265 /** Worst case num PUs in CTB based on i4_ctb_size */ 2266 WORD32 i4_num_pus_in_ctb; 2267 2268 /** Worst case num TUs in CTB based on i4_ctb_size */ 2269 WORD32 i4_num_tus_in_ctb; 2270 2271 /** Number of CTBs in horizontal direction 2272 * this is based on run time source width and i4_ctb_size 2273 */ 2274 WORD32 i4_num_ctbs_horz; 2275 2276 /** Number of CTBs in vertical direction 2277 * this is based on run time source height and i4_ctb_size 2278 */ 2279 WORD32 i4_num_ctbs_vert; 2280 2281 /** MAX CUs in horizontal direction 2282 * this is based on run time source width, i4_ctb_size and i4_num_cus_in_ctb 2283 */ 2284 WORD32 i4_max_cus_in_row; 2285 2286 /** MAX PUs in horizontal direction 2287 * this is based on run time source width, i4_ctb_size and i4_num_pus_in_ctb 2288 */ 2289 WORD32 i4_max_pus_in_row; 2290 2291 /** MAX TUs in horizontal direction 2292 * this is based on run time source width, i4_ctb_size and i4_num_tus_in_ctb 2293 */ 2294 WORD32 i4_max_tus_in_row; 2295 2296 /** 2297 * CU aligned picture width (currently aligned to MAX CU size) 2298 * should be modified to be aligned to MIN CU size 2299 */ 2300 2301 WORD32 i4_cu_aligned_pic_wd; 2302 2303 /** 2304 * CU aligned picture height (currently aligned to MAX CU size) 2305 * should be modified to be aligned to MIN CU size 2306 */ 2307 2308 WORD32 i4_cu_aligned_pic_ht; 2309 2310 /* Pointer to a frame level memory, 2311 Stride is = 1 + (num ctbs in a ctb-row) + 1 2312 Hieght is = 1 + (num ctbs in a ctb-col) 2313 Contains tile-id of each ctb */ 2314 WORD32 *pi4_tile_id_map; 2315 2316 /* stride in units of ctb */ 2317 WORD32 i4_tile_id_ctb_map_stride; 2318 2319 } frm_ctb_ctxt_t; 2320 2321 /** 2322 ****************************************************************************** 2323 * @brief ME Job Queue desc 2324 ****************************************************************************** 2325 */ 2326 typedef struct 2327 { 2328 /** Number of output dependencies which need to be set after 2329 * current job is complete, 2330 * should be less than or equal to MAX_OUT_DEP defined in 2331 * ihevce_multi_thrd_structs.h 2332 */ 2333 WORD32 i4_num_output_dep; 2334 2335 /** Array of offsets from the start of output dependent layer's Job Ques 2336 * which are dependent on current Job to be complete 2337 */ 2338 WORD32 ai4_out_dep_unit_off[MAX_OUT_DEP]; 2339 2340 /** Number of input dependencies to be resolved for current job to start 2341 * these many jobs in lower layer should be complete to 2342 * start the current JOB 2343 */ 2344 WORD32 i4_num_inp_dep; 2345 2346 } multi_thrd_me_job_q_prms_t; 2347 2348 /** 2349 * @brief structure in which recon data 2350 * and related parameters are sent from Encoder 2351 */ 2352 typedef struct 2353 { 2354 /** Kept for maintaining backwards compatibility in future */ 2355 WORD32 i4_size; 2356 2357 /** Buffer id for the current buffer */ 2358 WORD32 i4_buf_id; 2359 2360 /** POC of the current buffer */ 2361 WORD32 i4_poc; 2362 2363 /** End flag to communicate this is last frame output from encoder */ 2364 WORD32 i4_end_flag; 2365 2366 /** End flag to communicate encoder that this is the last buffer from application 2367 1 - Last buf, 0 - Not last buffer. No other values are supported. 2368 Application has to set the appropriate value before queing in encoder queue */ 2369 2370 WORD32 i4_is_last_buf; 2371 2372 /** Recon luma buffer pointer */ 2373 void *pv_y_buf; 2374 2375 /** Recon cb buffer pointer */ 2376 void *pv_cb_buf; 2377 2378 /** Recon cr buffer pointer */ 2379 void *pv_cr_buf; 2380 2381 /** Luma size **/ 2382 WORD32 i4_y_pixels; 2383 2384 /** Chroma size **/ 2385 WORD32 i4_uv_pixels; 2386 2387 } iv_enc_recon_data_buffs_t; 2388 2389 /** 2390 ****************************************************************************** 2391 * @brief Multi Thread context structure 2392 ****************************************************************************** 2393 */ 2394 typedef struct 2395 { 2396 /* Flag to indicate to enc and pre-enc thrds that app has sent force end cmd*/ 2397 WORD32 i4_force_end_flag; 2398 2399 /** Force all active threads flag 2400 * This flag will be set to 1 if all Number of cores givento the encoder 2401 * is less than or Equal to MAX_NUM_CORES_SEQ_EXEC. In this mode 2402 * All pre enc threads and enc threads will run of the same cores with 2403 * time sharing ar frame level 2404 */ 2405 WORD32 i4_all_thrds_active_flag; 2406 2407 /** Flag to indicate that core manager has been configured to enable 2408 * sequential execution 2409 */ 2410 WORD32 i4_seq_mode_enabled_flag; 2411 /*-----------------------------------------------------------------------*/ 2412 /*--------- Params related to encode group -----------------------------*/ 2413 /*-----------------------------------------------------------------------*/ 2414 2415 /** Number of processing threads created runtime in encode group */ 2416 WORD32 i4_num_enc_proc_thrds; 2417 2418 /** Number of processing threads active for a given frame 2419 * This value will be monitored at frame level, so as to 2420 * have provsion for increasing / decreasing threads 2421 * based on Load balance b/w stage in encoder 2422 */ 2423 WORD32 i4_num_active_enc_thrds; 2424 2425 /** Mutex for ensuring thread safety of the access of Job queues in encode group */ 2426 void *pv_job_q_mutex_hdl_enc_grp_me; 2427 2428 /** Mutex for ensuring thread safety of the access of Job queues in encode group */ 2429 void *pv_job_q_mutex_hdl_enc_grp_enc_loop; 2430 2431 /** Array of Semaphore handles (for each frame processing threads ) */ 2432 void *apv_enc_thrd_sem_handle[MAX_NUM_FRM_PROC_THRDS_ENC]; 2433 2434 /** Array for ME to export the Job que dependency for all layers */ 2435 multi_thrd_me_job_q_prms_t as_me_job_q_prms[MAX_NUM_HME_LAYERS][MAX_NUM_VERT_UNITS_FRM]; 2436 2437 /* pointer to the mutex handle*/ 2438 void *apv_mutex_handle[MAX_NUM_ME_PARALLEL]; 2439 2440 /* pointer to the mutex handle for frame init*/ 2441 void *apv_mutex_handle_me_end[MAX_NUM_ME_PARALLEL]; 2442 2443 /* pointer to the mutex handle for frame init*/ 2444 void *apv_mutex_handle_frame_init[MAX_NUM_ENC_LOOP_PARALLEL]; 2445 2446 /*pointer to the mutex handle*/ 2447 void *apv_post_enc_mutex_handle[MAX_NUM_ENC_LOOP_PARALLEL]; 2448 2449 /* Flag to indicate that master has done ME init*/ 2450 WORD32 ai4_me_master_done_flag[MAX_NUM_ME_PARALLEL]; 2451 2452 /* Counter to keep track of me num of thrds exiting critical section*/ 2453 WORD32 me_num_thrds_exited[MAX_NUM_ME_PARALLEL]; 2454 2455 /* Flag to indicate that master has done the frame init*/ 2456 WORD32 enc_master_done_frame_init[MAX_NUM_ENC_LOOP_PARALLEL]; 2457 2458 /* Counter to keep track of num of thrds exiting critical section*/ 2459 WORD32 num_thrds_exited[MAX_NUM_ENC_LOOP_PARALLEL]; 2460 2461 /* Counter to keep track of num of thrds exiting critical section for re-encode*/ 2462 WORD32 num_thrds_exited_for_reenc; 2463 2464 /* Array to store the curr qp for ping and pong instance*/ 2465 WORD32 cur_qp[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; 2466 2467 /* Pointers to store output buffers for ping and pong instance*/ 2468 frm_proc_ent_cod_ctxt_t *ps_curr_out_enc_grp[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; 2469 2470 /* Pointer to store input buffers for me*/ 2471 pre_enc_me_ctxt_t *aps_cur_inp_me_prms[MAX_NUM_ME_PARALLEL]; 2472 2473 /*pointers to store output buffers from me */ 2474 me_enc_rdopt_ctxt_t *aps_cur_out_me_prms[NUM_ME_ENC_BUFS]; 2475 2476 /*pointers to store input buffers to enc-rdopt */ 2477 me_enc_rdopt_ctxt_t *aps_cur_inp_enc_prms[NUM_ME_ENC_BUFS]; 2478 2479 /*Shared memory for Sub Pic rc */ 2480 /*Qscale calulated by sub pic rc bit control for Intra Pic*/ 2481 WORD32 ai4_curr_qp_estimated[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; 2482 2483 /*Header bits error by sub pic rc bit control*/ 2484 float af_acc_hdr_bits_scale_err[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; 2485 2486 /*Accumalated ME SAD for NCTB*/ 2487 LWORD64 ai8_nctb_me_sad[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES] 2488 [MAX_NUM_FRM_PROC_THRDS_ENC]; 2489 2490 /*Accumalated IPE SAD for NCTB*/ 2491 LWORD64 ai8_nctb_ipe_sad[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES] 2492 [MAX_NUM_FRM_PROC_THRDS_ENC]; 2493 2494 /*Accumalated L0 IPE SAD for NCTB*/ 2495 LWORD64 ai8_nctb_l0_ipe_sad[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES] 2496 [MAX_NUM_FRM_PROC_THRDS_ENC]; 2497 2498 /*Accumalated Activity Factor for NCTB*/ 2499 LWORD64 ai8_nctb_act_factor[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES] 2500 [MAX_NUM_FRM_PROC_THRDS_ENC]; 2501 2502 /*Accumalated Ctb counter across all threads*/ 2503 WORD32 ai4_ctb_ctr[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; 2504 2505 /*Bits threshold reached for across all threads*/ 2506 WORD32 ai4_threshold_reached[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; 2507 2508 /*To hold the Previous In-frame RC chunk QP*/ 2509 WORD32 ai4_prev_chunk_qp[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; 2510 2511 /*Accumalated Ctb counter across all threads*/ 2512 WORD32 ai4_acc_ctb_ctr[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; 2513 2514 /*Flag to check if thread is initialized */ 2515 WORD32 ai4_thrd_id_valid_flag[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES] 2516 [MAX_NUM_FRM_PROC_THRDS_ENC]; 2517 2518 /*Accumalated Ctb counter across all threads*/ 2519 //WORD32 ai4_acc_qp[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES][MAX_NUM_FRM_PROC_THRDS_ENC]; 2520 2521 /*Accumalated bits consumed for nctbs across all threads*/ 2522 LWORD64 ai8_nctb_bits_consumed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES] 2523 [MAX_NUM_FRM_PROC_THRDS_ENC]; 2524 2525 /*Accumalated hdr bits consumed for nctbs across all threads*/ 2526 LWORD64 ai8_nctb_hdr_bits_consumed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES] 2527 [MAX_NUM_FRM_PROC_THRDS_ENC]; 2528 2529 /*Accumalated l0 mpm bits consumed for nctbs across all threads*/ 2530 LWORD64 ai8_nctb_mpm_bits_consumed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES] 2531 [MAX_NUM_FRM_PROC_THRDS_ENC]; 2532 2533 /*Accumalated bits consumed for total ctbs across all threads*/ 2534 LWORD64 ai8_acc_bits_consumed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES] 2535 [MAX_NUM_FRM_PROC_THRDS_ENC]; 2536 2537 /*Accumalated bits consumed for total ctbs across all threads*/ 2538 LWORD64 ai8_acc_bits_mul_qs_consumed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES] 2539 [MAX_NUM_FRM_PROC_THRDS_ENC]; 2540 2541 /*Qscale calulated by sub pic rc bit control */ 2542 WORD32 ai4_curr_qp_acc[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; 2543 /* End of Sub pic rc variables */ 2544 2545 /* Pointers to store input (only L0 IPE)*/ 2546 pre_enc_L0_ipe_encloop_ctxt_t *aps_cur_L0_ipe_inp_prms[MAX_NUM_ME_PARALLEL]; 2547 2548 /* Array tp store L0 IPE input buf ids*/ 2549 WORD32 ai4_in_frm_l0_ipe_id[MAX_NUM_ME_PARALLEL]; 2550 2551 /* Array to store output buffer ids for ping and pong instances*/ 2552 WORD32 out_buf_id[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; 2553 2554 /* Array of pointers to store the recon buf pointers*/ 2555 iv_enc_recon_data_buffs_t *ps_recon_out[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; 2556 2557 /* Array of pointers to frame recon for ping and pong instances*/ 2558 recon_pic_buf_t *ps_frm_recon[NUM_ME_ENC_BUFS][IHEVCE_MAX_NUM_BITRATES]; 2559 2560 /* Array of recon buffer ids for ping and pong instance*/ 2561 WORD32 recon_buf_id[NUM_ME_ENC_BUFS][IHEVCE_MAX_NUM_BITRATES]; 2562 2563 /* Counter to keep track of num thrds done*/ 2564 WORD32 num_thrds_done; 2565 2566 /* Flags to keep track of dumped ping pong recon buffer*/ 2567 WORD32 is_recon_dumped[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; 2568 2569 /* Flags to keep track of dumped ping pong output buffer*/ 2570 WORD32 is_out_buf_freed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; 2571 2572 /* flag to produce output buffer by the thread who ever is finishing 2573 enc-loop processing first, so that the entropy thread can start processing */ 2574 WORD32 ai4_produce_outbuf[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; 2575 2576 /* Flags to keep track of dumped ping pong input buffer*/ 2577 WORD32 is_in_buf_freed[MAX_NUM_ENC_LOOP_PARALLEL]; 2578 2579 /* Flags to keep track of dumped ping pong L0 IPE to enc buffer*/ 2580 WORD32 is_L0_ipe_in_buf_freed[MAX_NUM_ENC_LOOP_PARALLEL]; 2581 2582 /** Dependency manager for checking whether prev. EncLoop done before 2583 current frame EncLoop starts */ 2584 void *apv_dep_mngr_prev_frame_done[MAX_NUM_ENC_LOOP_PARALLEL]; 2585 2586 /** Dependency manager for checking whether prev. EncLoop done before 2587 re-encode of the current frame */ 2588 void *pv_dep_mngr_prev_frame_enc_done_for_reenc; 2589 2590 /** Dependency manager for checking whether prev. me done before 2591 current frame me starts */ 2592 void *apv_dep_mngr_prev_frame_me_done[MAX_NUM_ME_PARALLEL]; 2593 2594 /** ME coarsest layer JOB queue type */ 2595 WORD32 i4_me_coarsest_lyr_type; 2596 2597 /** number of encloop frames running in parallel */ 2598 WORD32 i4_num_enc_loop_frm_pllel; 2599 2600 /** number of me frames running in parallel */ 2601 WORD32 i4_num_me_frm_pllel; 2602 2603 /*-----------------------------------------------------------------------*/ 2604 /*--------- Params related to pre-enc stage -----------------------------*/ 2605 /*-----------------------------------------------------------------------*/ 2606 2607 /** Number of processing threads created runtime in pre encode group */ 2608 WORD32 i4_num_pre_enc_proc_thrds; 2609 2610 /** Number of processing threads active for a given frame 2611 * This value will be monitored at frame level, so as to 2612 * have provsion for increasing / decreasing threads 2613 * based on Load balance b/w stage in encoder 2614 */ 2615 WORD32 i4_num_active_pre_enc_thrds; 2616 /** number of threads that have done processing the current frame 2617 Use to find out the last thread that is coming out of pre-enc processing 2618 so that the last thread can do de-init of pre-enc stage */ 2619 WORD32 ai4_num_thrds_processed_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2620 2621 /** number of threads that have done processing the current frame 2622 Use to find out the first thread and last inoder to get qp query. As the query 2623 is not read only , the quer should be done only once by thread that comes first 2624 and other threads should get same value*/ 2625 WORD32 ai4_num_thrds_processed_L0_ipe_qp_init[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2626 2627 /** number of threads that have done proessing decomp_intra 2628 Used to find out the last thread that is coming out so that 2629 the last thread can set flag for decomp_pre_intra_finish */ 2630 WORD32 ai4_num_thrds_processed_decomp[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2631 2632 /** number of threads that have done proessing coarse_me 2633 Used to find out the last thread that is coming out so that 2634 the last thread can set flag for coarse_me_finish */ 2635 WORD32 ai4_num_thrds_processed_coarse_me[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2636 2637 /*Flag to indicate if current instance (frame)'s Decomp_pre_intra and Coarse_ME is done. 2638 Used to check if previous frame is done proecessing decom_pre_intra and coarse_me */ 2639 WORD32 ai4_decomp_coarse_me_complete_flag[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2640 2641 /** Dependency manager for checking whether prev. frame decomp_intra 2642 done before current frame decomp_intra starts */ 2643 void *pv_dep_mngr_prev_frame_pre_enc_l1; 2644 2645 /** Dependency manager for checking whether prev. frame L0 IPE done before 2646 current frame L0 IPE starts */ 2647 void *pv_dep_mngr_prev_frame_pre_enc_l0; 2648 2649 /** Dependency manager for checking whether prev. frame coarse_me done before 2650 current frame coarse_me starts */ 2651 void *pv_dep_mngr_prev_frame_pre_enc_coarse_me; 2652 2653 /** flag to indicate if pre_enc_init is done for current frame */ 2654 WORD32 ai4_pre_enc_init_done[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2655 2656 /** flag to indicate if pre_enc_hme_init is done for current frame */ 2657 WORD32 ai4_pre_enc_hme_init_done[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2658 2659 /** flag to indicate if pre_enc_deinit is done for current frame */ 2660 WORD32 ai4_pre_enc_deinit_done[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2661 2662 /** Flag to indicate the end of processing when all the frames are done processing */ 2663 WORD32 ai4_end_flag_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2664 2665 /** Flag to indicate the control blocking mode indicating input command to pre-enc 2666 group should be blocking or unblocking */ 2667 WORD32 i4_ctrl_blocking_mode; 2668 2669 /** Current input pointer */ 2670 ihevce_lap_enc_buf_t *aps_curr_inp_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2671 2672 WORD32 i4_last_inp_buf; 2673 2674 /* buffer id for input buffer */ 2675 WORD32 ai4_in_buf_id_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2676 2677 /** Current output pointer */ 2678 pre_enc_me_ctxt_t *aps_curr_out_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2679 2680 /*Current L0 IPE to enc output pointer */ 2681 pre_enc_L0_ipe_encloop_ctxt_t *ps_L0_IPE_curr_out_pre_enc; 2682 2683 /** buffer id for output buffer */ 2684 WORD32 ai4_out_buf_id_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2685 2686 /** buffer id for L0 IPE enc buffer*/ 2687 WORD32 i4_L0_IPE_out_buf_id; 2688 2689 /** Current picture Qp */ 2690 WORD32 ai4_cur_frame_qp_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2691 2692 /** Decomp layer buffers indicies */ 2693 WORD32 ai4_decomp_lyr_buf_idx[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2694 2695 /*since it is guranteed that cur frame ipe will not start unless prev frame ipe is completly done, 2696 an array of MAX_PRE_ENC_STAGGER might not be required*/ 2697 WORD32 i4_qp_update_l0_ipe; 2698 2699 /** Current picture encoded is the last picture to be encoded flag */ 2700 WORD32 i4_last_pic_flag; 2701 2702 /** Mutex for ensuring thread safety of the access of Job queues in decomp stage */ 2703 void *pv_job_q_mutex_hdl_pre_enc_decomp; 2704 2705 /** Mutex for ensuring thread safety of the access of Job queues in HME group */ 2706 void *pv_job_q_mutex_hdl_pre_enc_hme; 2707 2708 /** Mutex for ensuring thread safety of the access of Job queues in l0 ipe stage */ 2709 void *pv_job_q_mutex_hdl_pre_enc_l0ipe; 2710 2711 /** mutex handle for pre-enc init */ 2712 void *pv_mutex_hdl_pre_enc_init; 2713 2714 /** mutex handle for pre-enc decomp deinit */ 2715 void *pv_mutex_hdl_pre_enc_decomp_deinit; 2716 2717 /** mutex handle for pre enc hme init */ 2718 void *pv_mutex_hdl_pre_enc_hme_init; 2719 2720 /** mutex handle for pre-enc hme deinit */ 2721 void *pv_mutex_hdl_pre_enc_hme_deinit; 2722 2723 /*qp qurey before l0 ipe is done by multiple frame*/ 2724 /** mutex handle for L0 ipe(pre-enc init)*/ 2725 void *pv_mutex_hdl_l0_ipe_init; 2726 2727 /** mutex handle for pre-enc deinit */ 2728 void *pv_mutex_hdl_pre_enc_deinit; 2729 2730 /** Array of Semaphore handles (for each frame processing threads ) */ 2731 void *apv_pre_enc_thrd_sem_handle[MAX_NUM_FRM_PROC_THRDS_ENC]; 2732 /** array which will tell the number of CTB processed in each row, 2733 * used for Row level sync in IPE pass 2734 */ 2735 WORD32 ai4_ctbs_in_row_proc_ipe_pass[MAX_NUM_CTB_ROWS_FRM]; 2736 2737 /** Job Queue Memory pre encode */ 2738 job_queue_t *aps_job_q_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2739 2740 /** Array of Job Queue handles enc group */ 2741 job_queue_handle_t as_job_que_preenc_hdls[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME] 2742 [NUM_PRE_ENC_JOBS_QUES]; 2743 2744 /* accumulate intra sad across all thread to get qp before L0 IPE*/ 2745 WORD32 ai4_intra_satd_acc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME] 2746 [MAX_NUM_FRM_PROC_THRDS_PRE_ENC]; 2747 2748 WORD32 i4_delay_pre_me_btw_l0_ipe; 2749 2750 /*** This variable has the maximum delay between hme and l0ipe ***/ 2751 /*** This is used for wrapping around L0IPE index ***/ 2752 WORD32 i4_max_delay_pre_me_btw_l0_ipe; 2753 2754 /* This is to register the handles of Dep Mngr b/w EncLoop and ME */ 2755 /* This is used to delete the Mngr at the end */ 2756 void *apv_dep_mngr_encloop_dep_me[NUM_ME_ENC_BUFS]; 2757 /*flag to track buffer in me/enc que is produced or not*/ 2758 WORD32 ai4_me_enc_buff_prod_flag[NUM_ME_ENC_BUFS]; 2759 2760 /*out buf que id for me */ 2761 WORD32 ai4_me_out_buf_id[NUM_ME_ENC_BUFS]; 2762 2763 /*in buf que id for enc from me*/ 2764 WORD32 i4_enc_in_buf_id[NUM_ME_ENC_BUFS]; 2765 2766 /* This is used to tell whether the free of recon buffers are done or not */ 2767 WORD32 i4_is_recon_free_done; 2768 2769 /* index for DVSR population */ 2770 WORD32 i4_idx_dvsr_p; 2771 WORD32 aai4_l1_pre_intra_done[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME] 2772 [(HEVCE_MAX_HEIGHT >> 1) / 8]; 2773 2774 WORD32 i4_rc_l0_qp; 2775 2776 /* Used for mres single out cases. Checks whether a particular resolution is active or passive */ 2777 /* Only one resolution should be active for mres_single_out case */ 2778 WORD32 *pi4_active_res_id; 2779 2780 /** 2781 * Sub Pic bit control mutex lock handle 2782 */ 2783 void *pv_sub_pic_rc_mutex_lock_hdl; 2784 2785 void *pv_sub_pic_rc_for_qp_update_mutex_lock_hdl; 2786 2787 WORD32 i4_encode; 2788 WORD32 i4_in_frame_rc_enabled; 2789 WORD32 i4_num_re_enc; 2790 2791 } multi_thrd_ctxt_t; 2792 2793 /** 2794 * @brief Structure to describe tile params 2795 */ 2796 typedef struct 2797 { 2798 /* flag to indicate tile encoding enabled/disabled */ 2799 WORD32 i4_tiles_enabled_flag; 2800 2801 /* flag to indicate unifrom spacing of tiles */ 2802 WORD32 i4_uniform_spacing_flag; 2803 2804 /* num tiles in a tile-row. num tiles in tile-col */ 2805 WORD32 i4_num_tile_cols; 2806 WORD32 i4_num_tile_rows; 2807 2808 /* Curr tile width and height*/ 2809 WORD32 i4_curr_tile_width; 2810 WORD32 i4_curr_tile_height; 2811 2812 /* Curr tile width and heignt in CTB units*/ 2813 WORD32 i4_curr_tile_wd_in_ctb_unit; 2814 WORD32 i4_curr_tile_ht_in_ctb_unit; 2815 2816 /* frame resolution */ 2817 //WORD32 i4_frame_width; /* encode-width */ 2818 //WORD32 i4_frame_height; /* encode-height */ 2819 2820 /* total num of tiles "in frame" */ 2821 WORD32 i4_num_tiles; 2822 2823 /* Curr tile id. Assigned by raster scan order in a frame */ 2824 WORD32 i4_curr_tile_id; 2825 2826 /* x-pos of first ctb of the slice in ctb */ 2827 /* y-pos of first ctb of the slice in ctb */ 2828 WORD32 i4_first_ctb_x; 2829 WORD32 i4_first_ctb_y; 2830 2831 /* x-pos of first ctb of the slice in samples */ 2832 /* y-pos of first ctb of the slice in samples */ 2833 WORD32 i4_first_sample_x; 2834 WORD32 i4_first_sample_y; 2835 2836 } ihevce_tile_params_t; 2837 2838 /** 2839 ****************************************************************************** 2840 * @brief Encoder context structure 2841 ****************************************************************************** 2842 */ 2843 2844 typedef struct 2845 { 2846 /** 2847 * vps parameters 2848 */ 2849 vps_t as_vps[IHEVCE_MAX_NUM_BITRATES]; 2850 2851 /** 2852 * sps parameters 2853 */ 2854 sps_t as_sps[IHEVCE_MAX_NUM_BITRATES]; 2855 2856 /** 2857 * pps parameters 2858 * Required for each bitrate separately, mainly because 2859 * init qp etc parameters needs to be different for each instance 2860 */ 2861 pps_t as_pps[IHEVCE_MAX_NUM_BITRATES]; 2862 2863 /** 2864 * Rate control mutex lock handle 2865 */ 2866 void *pv_rc_mutex_lock_hdl; 2867 2868 /** frame level cu analyse buffer pointer for ME 2869 * ME will get ps_ctb_analyse structure populated with ps_cu pointers 2870 * pointing to ps_cu_analyse buffer from IPE. 2871 */ 2872 //cu_analyse_t *ps_cu_analyse_inter[PING_PONG_BUF]; 2873 2874 /** 2875 * CTB frame context between encoder (producer) and entropy (consumer) 2876 */ 2877 enc_q_ctxt_t s_enc_ques; 2878 2879 /** 2880 * Encoder memory manager ctxt 2881 */ 2882 enc_mem_mngr_ctxt s_mem_mngr; 2883 2884 /** 2885 * Semaphores of all the threads created in HLE 2886 * and Que handle for buffers b/w frame process and entropy 2887 */ 2888 thrd_que_sem_hdl_t s_thrd_sem_ctxt; 2889 2890 /** 2891 * Reference /recon buffer Que pointer 2892 */ 2893 recon_pic_buf_t **pps_recon_buf_q[IHEVCE_MAX_NUM_BITRATES]; 2894 2895 /** 2896 * Number of buffers in Recon buffer queue 2897 */ 2898 WORD32 ai4_num_buf_recon_q[IHEVCE_MAX_NUM_BITRATES]; 2899 2900 /** 2901 * Reference / recon buffer Que pointer for Pre Encode group 2902 * this will be just a container and no buffers will be allcoated 2903 */ 2904 recon_pic_buf_t **pps_pre_enc_recon_buf_q; 2905 2906 /** 2907 * Number of buffers in Recon buffer queue 2908 */ 2909 WORD32 i4_pre_enc_num_buf_recon_q; 2910 2911 /** 2912 * frame level CTB parameters and worst PU CU and TU in a CTB row 2913 */ 2914 frm_ctb_ctxt_t s_frm_ctb_prms; 2915 2916 /* 2917 * Moudle ctxt pointers of all modules 2918 */ 2919 module_ctxt_t s_module_ctxt; 2920 2921 /* 2922 * LAP static parameters 2923 */ 2924 ihevce_lap_static_params_t s_lap_stat_prms; 2925 2926 /* 2927 * Run time dynamic source params 2928 */ 2929 2930 ihevce_src_params_t s_runtime_src_prms; 2931 2932 /* 2933 *Target params 2934 */ 2935 ihevce_tgt_params_t s_runtime_tgt_params; 2936 2937 /* 2938 * Run time dynamic coding params 2939 */ 2940 ihevce_coding_params_t s_runtime_coding_prms; 2941 2942 /** 2943 * Pointer to static config params 2944 */ 2945 ihevce_static_cfg_params_t *ps_stat_prms; 2946 2947 /** 2948 * the following structure members used for copying recon buf info 2949 * in case of duplicate pics 2950 */ 2951 2952 /** 2953 * Array of reference picture list for pre enc group 2954 * Separate list for ping_pong instnaces 2955 * 2=> ref_pic_list0 and ref_pic_list1 2956 */ 2957 recon_pic_buf_t as_pre_enc_ref_lists[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME][2] 2958 [HEVCE_MAX_REF_PICS * 2]; 2959 2960 /** 2961 * Array of reference picture list for pre enc group 2962 * Separate list for ping_pong instnaces 2963 * 2=> ref_pic_list0 and ref_pic_list1 2964 */ 2965 recon_pic_buf_t *aps_pre_enc_ref_lists[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME][2] 2966 [HEVCE_MAX_REF_PICS * 2]; 2967 2968 /** 2969 * Number of input frames per input queue 2970 */ 2971 WORD32 i4_num_input_buf_per_queue; 2972 2973 /** 2974 * poc of the Clean Random Access(CRA)Ipic 2975 */ 2976 WORD32 i4_cra_poc; 2977 2978 /** Number of ref pics in list 0 for any given frame */ 2979 WORD32 i4_num_ref_l0; 2980 2981 /** Number of ref pics in list 1 for any given frame */ 2982 WORD32 i4_num_ref_l1; 2983 2984 /** Number of active ref pics in list 0 for cur frame */ 2985 WORD32 i4_num_ref_l0_active; 2986 2987 /** Number of active ref pics in list 1 for cur frame */ 2988 WORD32 i4_num_ref_l1_active; 2989 2990 /** Number of ref pics in list 0 for any given frame pre encode stage */ 2991 WORD32 i4_pre_enc_num_ref_l0; 2992 2993 /** Number of ref pics in list 1 for any given frame pre encode stage */ 2994 WORD32 i4_pre_enc_num_ref_l1; 2995 2996 /** Number of active ref pics in list 0 for cur frame pre encode stage */ 2997 WORD32 i4_pre_enc_num_ref_l0_active; 2998 2999 /** Number of active ref pics in list 1 for cur frame pre encode stage */ 3000 WORD32 i4_pre_enc_num_ref_l1_active; 3001 3002 /** 3003 * working mem to be used for frm level activities 3004 * One example is interplation at frame level. This requires memory 3005 * of (max width + 16) * (max_height + 7 + 16 ) * 2 bytes. 3006 * This is so since we generate interp output for max_width + 16 x 3007 * max_height + 16, and then the intermediate output is 16 bit and 3008 * is max_height + 16 + 7 rows 3009 */ 3010 UWORD8 *pu1_frm_lvl_wkg_mem; 3011 3012 /** 3013 * Multi thread processing context 3014 * This memory contains the variables and pointers shared across threads 3015 * in enc-group and pre-enc-group 3016 */ 3017 multi_thrd_ctxt_t s_multi_thrd; 3018 3019 /** I/O Queues created status */ 3020 WORD32 i4_io_queues_created; 3021 3022 WORD32 i4_end_flag; 3023 3024 /** number of bit-rate instances running */ 3025 WORD32 i4_num_bitrates; 3026 3027 /** number of enc frames running in parallel */ 3028 WORD32 i4_num_enc_loop_frm_pllel; 3029 3030 /*ref bitrate id*/ 3031 WORD32 i4_ref_mbr_id; 3032 3033 /* Flag to indicate app, that end of processing has reached */ 3034 WORD32 i4_frame_limit_reached; 3035 3036 /*Structure to store the function selector 3037 * pointers for common and encoder */ 3038 func_selector_t s_func_selector; 3039 3040 /*ref resolution id*/ 3041 WORD32 i4_resolution_id; 3042 3043 /*hle context*/ 3044 void *pv_hle_ctxt; 3045 3046 rc_quant_t s_rc_quant; 3047 /*ME cost of P pic stored for the next ref B pic*/ 3048 //LWORD64 i8_acc_me_cost_of_p_pic_for_b_pic[2]; 3049 3050 UWORD32 u4_cur_pic_encode_cnt; 3051 UWORD32 u4_cur_pic_encode_cnt_dbp; 3052 /*past 2 p pics high complexity status*/ 3053 WORD32 ai4_is_past_pic_complex[2]; 3054 3055 WORD32 i4_is_I_reset_done; 3056 WORD32 i4_past_RC_reset_count; 3057 3058 WORD32 i4_future_RC_reset; 3059 3060 WORD32 i4_past_RC_scd_reset_count; 3061 3062 WORD32 i4_future_RC_scd_reset; 3063 WORD32 i4_poc_reset_values; 3064 3065 /*Place holder to store the length of LAP in first pass*/ 3066 /** Number of frames to look-ahead for RC by - 3067 * counts 2 fields as one frame for interlaced 3068 */ 3069 WORD32 i4_look_ahead_frames_in_first_pass; 3070 3071 WORD32 ai4_mod_factor_derived_by_variance[2]; 3072 float f_strength; 3073 3074 /*for B frames use the avg activity 3075 from the layer 0 (I or P) which is the average over 3076 Lap2 window*/ 3077 LWORD64 ai8_lap2_8x8_avg_act_from_T0[2]; 3078 3079 LWORD64 ai8_lap2_16x16_avg_act_from_T0[3]; 3080 3081 LWORD64 ai8_lap2_32x32_avg_act_from_T0[3]; 3082 3083 /*for B frames use the log of avg activity 3084 from the layer 0 (I or P) which is the average over 3085 Lap2 window*/ 3086 long double ald_lap2_8x8_log_avg_act_from_T0[2]; 3087 3088 long double ald_lap2_16x16_log_avg_act_from_T0[3]; 3089 3090 long double ald_lap2_32x32_log_avg_act_from_T0[3]; 3091 3092 ihevce_tile_params_t *ps_tile_params_base; 3093 3094 WORD32 ai4_column_width_array[MAX_TILE_COLUMNS]; 3095 3096 WORD32 ai4_row_height_array[MAX_TILE_ROWS]; 3097 3098 /* Architecture */ 3099 IV_ARCH_T e_arch_type; 3100 3101 UWORD8 u1_is_popcnt_available; 3102 3103 WORD32 i4_active_scene_num; 3104 3105 WORD32 i4_max_fr_enc_loop_parallel_rc; 3106 WORD32 ai4_rc_query[IHEVCE_MAX_NUM_BITRATES]; 3107 WORD32 i4_active_enc_frame_id; 3108 3109 /** 3110 * LAP interface ctxt pointer 3111 */ 3112 void *pv_lap_interface_ctxt; 3113 3114 /* If enable, enables blu ray compatibility of op*/ 3115 WORD32 i4_blu_ray_spec; 3116 3117 } enc_ctxt_t; 3118 3119 /** 3120 ****************************************************************************** 3121 * @brief This struct contains the inter CTB params needed for the decision 3122 * of the best inter CU results 3123 ****************************************************************************** 3124 */ 3125 typedef struct 3126 { 3127 hme_pred_buf_mngr_t s_pred_buf_mngr; 3128 3129 /** X and y offset of ctb w.r.t. start of pic */ 3130 WORD32 i4_ctb_x_off; 3131 WORD32 i4_ctb_y_off; 3132 3133 /** 3134 * Pred buffer ptr, updated inside subpel refinement process. This 3135 * location passed to the leaf fxn for copying the winner pred buf 3136 */ 3137 UWORD8 **ppu1_pred; 3138 3139 /** Working mem passed to leaf fxns */ 3140 UWORD8 *pu1_wkg_mem; 3141 3142 /** prediction buffer stride fo rleaf fxns to copy the pred winner buf */ 3143 WORD32 i4_pred_stride; 3144 3145 /** Stride of input buf, updated inside subpel fxn */ 3146 WORD32 i4_inp_stride; 3147 3148 /** stride of recon buffer */ 3149 WORD32 i4_rec_stride; 3150 3151 /** Indicates if bi dir is enabled or not */ 3152 WORD32 i4_bidir_enabled; 3153 3154 /** 3155 * Total number of references of current picture which is enocded 3156 */ 3157 UWORD8 u1_num_ref; 3158 3159 /** Recon Pic buffer pointers for L0 list */ 3160 recon_pic_buf_t **pps_rec_list_l0; 3161 3162 /** Recon Pic buffer pointers for L1 list */ 3163 recon_pic_buf_t **pps_rec_list_l1; 3164 3165 /** 3166 * These pointers point to modified input, one each for one ref idx. 3167 * Instead of weighting the reference, we weight the input with inverse 3168 * wt and offset for list 0 and list 1. 3169 */ 3170 UWORD8 *apu1_wt_inp[2][MAX_NUM_REF]; 3171 3172 /* Since ME uses weighted inputs, we use reciprocal of the actual weights */ 3173 /* that are signaled in the bitstream */ 3174 WORD32 *pi4_inv_wt; 3175 WORD32 *pi4_inv_wt_shift_val; 3176 3177 /* Map between L0 Reference indices and LC indices */ 3178 WORD8 *pi1_past_list; 3179 3180 /* Map between L1 Reference indices and LC indices */ 3181 WORD8 *pi1_future_list; 3182 3183 /** 3184 * Points to the non-weighted input data for the current CTB 3185 */ 3186 UWORD8 *pu1_non_wt_inp; 3187 3188 /** 3189 * Store the pred lambda and lamda_qshifts for all the reference indices 3190 */ 3191 WORD32 i4_lamda; 3192 3193 UWORD8 u1_lamda_qshift; 3194 3195 WORD32 wpred_log_wdc; 3196 3197 /** 3198 * Number of active references in l0 3199 */ 3200 UWORD8 u1_num_active_ref_l0; 3201 3202 /** 3203 * Number of active references in l1 3204 */ 3205 UWORD8 u1_num_active_ref_l1; 3206 3207 /** The max_depth for inter tu_tree */ 3208 UWORD8 u1_max_tr_depth; 3209 3210 /** Quality Preset */ 3211 WORD8 i1_quality_preset; 3212 3213 /** SATD or SAD */ 3214 UWORD8 u1_use_satd; 3215 3216 /* Frame level QP */ 3217 WORD32 i4_qstep_ls8; 3218 3219 /* Pointer to an array of PU level src variances */ 3220 UWORD32 *pu4_src_variance; 3221 3222 WORD32 i4_alpha_stim_multiplier; 3223 3224 UWORD8 u1_is_cu_noisy; 3225 3226 ULWORD64 *pu8_part_src_sigmaX; 3227 3228 ULWORD64 *pu8_part_src_sigmaXSquared; 3229 3230 UWORD8 u1_max_2nx2n_tu_recur_cands; 3231 3232 } inter_ctb_prms_t; 3233 3234 /*****************************************************************************/ 3235 /* Extern Variable Declarations */ 3236 /*****************************************************************************/ 3237 extern const double lamda_modifier_for_I_pic[8]; 3238 3239 /*****************************************************************************/ 3240 /* Extern Function Declarations */ 3241 /*****************************************************************************/ 3242 3243 #endif /* _IHEVCE_ENC_STRUCTS_H_ */ 3244