1 /****************************************************************************** 2 * 3 * Copyright (C) 2018 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ***************************************************************************** 18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 */ 20 /*! 21 ****************************************************************************** 22 * \file ihevce_enc_structs.h 23 * 24 * \brief 25 * This file contains structure definations of Encoder 26 * 27 * \date 28 * 18/09/2012 29 * 30 * \author 31 * Ittiam 32 * 33 ****************************************************************************** 34 */ 35 36 #ifndef _IHEVCE_ENC_STRUCTS_H_ 37 #define _IHEVCE_ENC_STRUCTS_H_ 38 39 /*****************************************************************************/ 40 /* Constant Macros */ 41 /*****************************************************************************/ 42 #define HEVCE_MAX_WIDTH 1920 43 #define HEVCE_MAX_HEIGHT 1088 44 45 #define HEVCE_MIN_WIDTH 64 46 #define HEVCE_MIN_HEIGHT 64 47 48 #define MAX_CTBS_IN_FRAME (HEVCE_MAX_WIDTH * HEVCE_MAX_HEIGHT) / (MIN_CTB_SIZE * MIN_CTB_SIZE) 49 #define MAX_NUM_CTB_ROWS_FRM (HEVCE_MAX_HEIGHT) / (MIN_CTB_SIZE) 50 51 #define MIN_VERT_PROC_UNIT (8) 52 #define MAX_NUM_VERT_UNITS_FRM (HEVCE_MAX_HEIGHT) / (MIN_VERT_PROC_UNIT) 53 54 #define HEVCE_MAX_REF_PICS 8 55 #define HEVCE_MAX_DPB_PICS (HEVCE_MAX_REF_PICS + 1) 56 57 #define PAD_HORZ 80 58 #define PAD_VERT 80 59 60 #define DEFAULT_MAX_REFERENCE_PICS 4 61 62 #define BLU_RAY_SUPPORT 231457 63 64 /** @brief max number of parts in minCU : max 4 for NxN */ 65 #define NUM_PU_PARTS 4 66 /** @brief max number of parts in Inter CU */ 67 #define NUM_INTER_PU_PARTS (MAX_NUM_INTER_PARTS) 68 #define SEND_BI_RDOPT 69 #ifdef SEND_BI_RDOPT 70 /** @brief */ 71 #define MAX_INTER_CU_CANDIDATES 4 72 #else 73 /** @brief */ 74 #define MAX_INTER_CU_CANDIDATES 3 75 #endif 76 /** @brief */ 77 #define MAX_INTRA_CU_CANDIDATES 3 78 79 #define MAX_INTRA_CANDIDATES 35 80 81 /** For each resolution & bit-rate instance, one entropy thread is created */ 82 #define NUM_ENTROPY_THREADS (IHEVCE_MAX_NUM_RESOLUTIONS * IHEVCE_MAX_NUM_BITRATES) 83 84 /* Number of buffers between Decomp and HME layers 1 : Seq mode >1 parallel mode */ 85 #define NUM_BUFS_DECOMP_HME 1 86 87 /** Macro to indicate pre me and L0 ipe stagger in pre enc*/ 88 /** Implies MAX_PRE_ENC_STAGGER - 1 max stagger*/ 89 #define MAX_PRE_ENC_STAGGER (NUM_LAP2_LOOK_AHEAD + 1 + MIN_L1_L0_STAGGER_NON_SEQ) 90 91 #define NUM_ME_ENC_BUFS (MAX_NUM_ENC_LOOP_PARALLEL) 92 93 #define MIN_L0_IPE_ENC_STAGGER 1 94 95 /*stagger between L0 IPE and enc*/ 96 #define MAX_L0_IPE_ENC_STAGGER (NUM_ME_ENC_BUFS + (MIN_L0_IPE_ENC_STAGGER)) 97 98 #define MAX_PRE_ENC_RC_DELAY (MAX_L0_IPE_ENC_STAGGER + 1 + NUM_BUFS_DECOMP_HME) 99 100 #define MIN_PRE_ENC_RC_DELAY (MIN_L0_IPE_ENC_STAGGER + 1 + NUM_BUFS_DECOMP_HME) 101 102 /** @brief number of contexts buffers maintained at frame level b/w pre-encode : encode */ 103 /*Explaination for minus 1: eg: MAX_PRE_ENC_STAGGER = 31 and MAX_L0_IPE_ENC_STAGGER = 5, In this case L1 produce 30 buffer, 104 l0 will start off with 30th buffer and enc will work on 33nd and 34rd frame.*/ 105 /* NUM_BUFS_DECOMP_HME is added to take care of pipeline between Decomp-preintra and HME */ 106 #define MAX_NUM_PREENC_ENC_BUFS \ 107 (MAX_PRE_ENC_STAGGER + MAX_L0_IPE_ENC_STAGGER + NUM_BUFS_DECOMP_HME - 1) //22//5 108 109 #define MIN_NUM_PREENC_ENC_BUFS \ 110 (MAX_PRE_ENC_STAGGER + MIN_L0_IPE_ENC_STAGGER + NUM_BUFS_DECOMP_HME - 1) 111 112 /** @brief number of ctb contexts maintained at frame level b/w encode : entropy */ 113 #define NUM_FRMPROC_ENTCOD_BUFS 8 114 115 /** @brief number of extra recon buffs required for stagger design*/ 116 #define NUM_EXTRA_RECON_BUFS 0 117 118 /** recon picture buffer size need to be increased to support EncLoop Parallelism **/ 119 #define NUM_EXTRA_RECON_BUFS_FOR_ELP 0 120 121 /** @brief maximum number of bytes in 4x4 afetr scanning */ 122 #define MAX_SCAN_COEFFS_BYTES_4x4 (48) 123 124 /** @brief maximum number of luma coeffs bytes after scan at CTB level */ 125 #define MAX_LUMA_COEFFS_CTB ((MAX_SCAN_COEFFS_BYTES_4x4) * (MAX_TU_IN_CTB)*4) 126 127 /** @brief maximum number of chroma coeffs bytes after scan at CTB level */ 128 #define MAX_CHRM_COEFFS_CTB ((MAX_SCAN_COEFFS_BYTES_4x4) * ((MAX_TU_IN_CTB >> 1)) * 4) 129 130 /** @brief maximum number of coeffs bytes after scan at CTB level */ 131 #define MAX_SCAN_COEFFS_CTB ((MAX_LUMA_COEFFS_CTB) + (MAX_CHRM_COEFFS_CTB)) 132 133 /** @breif PU map CTB buffer buyes for neighbour availibility */ 134 #define MUN_PU_MAP_BYTES_PER_CTB (MAX_PU_IN_CTB_ROW * MAX_PU_IN_CTB_ROW) 135 136 /** @brief tottal system memory records */ 137 #define TOTAL_SYSTEM_MEM_RECS 120 138 139 /** @brief number of input async command buffers */ 140 #define NUM_AYSNC_CMD_BUFS 4 141 142 /** @brief Comand buffers size */ 143 #define ENC_COMMAND_BUFF_SIZE 512 /* 512 bytes */ 144 145 /** @brief Number of output buffers */ 146 #define NUM_OUTPUT_BUFS 4 147 148 /** @brief Lamda for SATD cost estimation */ 149 #define LAMDA_SATD 1 150 151 /** @brief Maximum number of 1s in u2_sig_coeff_abs_gt1_flags */ 152 #define MAX_GT_ONE 8 153 154 /** MAX num ipntra pred modes */ 155 #define MAX_NUM_IP_MODES 35 156 157 /** Number of best intra modes used for intra mode refinement */ 158 #define NUM_BEST_MODES 3 159 160 /** Maximim number of parallel frame processing threads in pre enocde group */ 161 #define MAX_NUM_FRM_PROC_THRDS_PRE_ENC MAX_NUM_CORES 162 163 /** Maximim number of parallel frame processing threads in encode group */ 164 #define MAX_NUM_FRM_PROC_THRDS_ENC MAX_NUM_CORES 165 166 /** Macro to indicate teh PING_PONG buffers for stagger*/ 167 #define PING_PONG_BUF 2 168 169 /** Max number of layers in Motion estimation 170 * should be greater than or equal to MAX_NUM_LAYERS defined in hme_interface.h 171 */ 172 173 #define MAX_NUM_HME_LAYERS 5 174 /** 175 ****************************************************************************** 176 * @brief Maximum number of layers allowed 177 ****************************************************************************** 178 */ 179 #define MAX_NUM_LAYERS 4 180 181 #define NUM_RC_PIC_TYPE 9 182 183 #define MAX_NUM_NODES_CU_TREE (85) 184 185 /* macros to control Dynamic load balance */ 186 #define DYN_LOAD_BAL_UPPER_LIMIT 0.80 187 188 #define DYN_LOAD_BAL_LOWER_LIMIT 0.20 189 190 #define NUM_SUB_GOP_DYN_BAL 1 191 192 #define MIN_NUM_FRMS_DYN_BAL 4 193 194 #define CORES_SRES_OR_MRES 2 195 196 #define HME_HIGH_SAD_BLK_THRESH 35 197 198 /* Enable to compare cabac states of final entropy thread with enc loop states */ 199 #define VERIFY_ENCLOOP_CABAC_STATES 0 200 201 #define MAX_NUM_BLKS_IN_MAX_CU 64 /* max cu size is 64x64 */ 202 203 /*****************************************************************************/ 204 /* Function Macros */ 205 /*****************************************************************************/ 206 207 /*****************************************************************************/ 208 /* Typedefs */ 209 /*****************************************************************************/ 210 typedef void (*pf_iq_it_rec)( 211 WORD16 *pi2_src, 212 WORD16 *pi2_tmp, 213 UWORD8 *pu1_pred, 214 WORD16 *pi2_dequant_coeff, 215 UWORD8 *pu1_dst, 216 WORD32 qp_div, /* qpscaled / 6 */ 217 WORD32 qp_rem, /* qpscaled % 6 */ 218 WORD32 src_strd, 219 WORD32 pred_strd, 220 WORD32 dst_strd, 221 WORD32 zero_cols, 222 WORD32 zero_rows); 223 224 typedef void (*pf_intra_pred)( 225 UWORD8 *pu1_ref, WORD32 src_strd, UWORD8 *pu1_dst, WORD32 dst_strd, WORD32 nt, WORD32 mode); 226 227 typedef UWORD32 (*pf_res_trans_luma)( 228 UWORD8 *pu1_src, 229 UWORD8 *pu1_pred, 230 WORD32 *pi4_tmp, 231 WORD16 *pi2_dst, 232 WORD32 src_strd, 233 WORD32 pred_strd, 234 WORD32 dst_strd_chr_flag); 235 236 typedef WORD32 (*pf_quant)( 237 WORD16 *pi2_coeffs, 238 WORD16 *pi2_quant_coeff, 239 WORD16 *pi2_dst, 240 WORD32 qp_div, /* qpscaled / 6 */ 241 WORD32 qp_rem, /* qpscaled % 6 */ 242 WORD32 q_add, 243 WORD32 src_strd, 244 WORD32 dst_strd, 245 UWORD8 *pu1_csbf_buf, 246 WORD32 csbf_strd, 247 WORD32 *zero_cols, 248 WORD32 *zero_row); 249 250 /*****************************************************************************/ 251 /* Enums */ 252 /*****************************************************************************/ 253 /// supported partition shape 254 typedef enum 255 { 256 SIZE_2Nx2N = 0, ///< symmetric motion partition, 2Nx2N 257 SIZE_2NxN = 1, ///< symmetric motion partition, 2Nx N 258 SIZE_Nx2N = 2, ///< symmetric motion partition, Nx2N 259 SIZE_NxN = 3, ///< symmetric motion partition, Nx N 260 SIZE_2NxnU = 4, ///< asymmetric motion partition, 2Nx( N/2) + 2Nx(3N/2) 261 SIZE_2NxnD = 5, ///< asymmetric motion partition, 2Nx(3N/2) + 2Nx( N/2) 262 SIZE_nLx2N = 6, ///< asymmetric motion partition, ( N/2)x2N + (3N/2)x2N 263 SIZE_nRx2N = 7 ///< asymmetric motion partition, (3N/2)x2N + ( N/2)x2N 264 } PART_SIZE_E; 265 266 /** @brief Interface level Queues of Encoder */ 267 268 typedef enum 269 { 270 IHEVCE_INPUT_DATA_CTRL_Q = 0, 271 IHEVCE_ENC_INPUT_Q, 272 IHEVCE_INPUT_ASYNCH_CTRL_Q, 273 IHEVCE_OUTPUT_DATA_Q, 274 IHEVCE_OUTPUT_STATUS_Q, 275 IHEVCE_RECON_DATA_Q, // /*que for holding recon buffer */ 276 277 IHEVCE_FRM_PRS_ENT_COD_Q, /*que for holding output buffer of enc_loop |input buffer of entropy */ 278 279 IHEVCE_PRE_ENC_ME_Q, /*que for holding input buffer to ME | output of pre-enc */ 280 281 IHEVCE_ME_ENC_RDOPT_Q, /* que for holding output buffer of ME or input buffer of Enc-RDopt */ 282 283 IHEVCE_L0_IPE_ENC_Q, /* Queue for holding L0 ipe data to enc loop*/ 284 285 /* should be last entry */ 286 IHEVCE_MAX_NUM_QUEUES 287 288 } IHEVCE_Q_DESC_T; 289 290 /*****************************************************************************/ 291 /* Structure */ 292 /*****************************************************************************/ 293 294 /** 295 RC_QP_QSCALE conversion structures 296 **/ 297 typedef struct 298 { 299 WORD16 i2_min_qp; 300 301 WORD16 i2_max_qp; 302 303 WORD16 i2_min_qscale; 304 305 WORD16 i2_max_qscale; 306 307 WORD32 *pi4_qscale_to_qp; 308 309 WORD32 *pi4_qp_to_qscale_q_factor; 310 311 WORD32 *pi4_qp_to_qscale; 312 313 WORD8 i1_qp_offset; 314 315 } rc_quant_t; 316 317 /** 318 ****************************************************************************** 319 * @brief 4x4 level structure which contains all the parameters 320 * for neighbour prediction puopose 321 ****************************************************************************** 322 */ 323 typedef struct 324 { 325 /** PU motion vectors */ 326 pu_mv_t mv; 327 /** Intra or Inter flag for each partition - 0 or 1 */ 328 UWORD16 b1_intra_flag : 1; 329 /** CU skip flag - 0 or 1 */ 330 UWORD16 b1_skip_flag : 1; 331 /** CU depth in CTB tree (0-3) */ 332 UWORD16 b2_cu_depth : 2; 333 334 /** Y Qp for loop filter */ 335 WORD16 b8_qp : 8; 336 337 /** Luma Intra Mode 0 - 34 */ 338 UWORD16 b6_luma_intra_mode : 6; 339 340 /** Y CBF for BS compute */ 341 UWORD16 b1_y_cbf : 1; 342 /** Pred L0 flag of current 4x4 */ 343 UWORD16 b1_pred_l0_flag : 1; 344 345 /** Pred L0 flag of current 4x4 */ 346 UWORD16 b1_pred_l1_flag : 1; 347 } nbr_4x4_t; 348 349 typedef struct 350 { 351 /** Bottom Left availability flag */ 352 UWORD8 u1_bot_lt_avail; 353 354 /** Left availability flag */ 355 UWORD8 u1_left_avail; 356 357 /** Top availability flag */ 358 UWORD8 u1_top_avail; 359 360 /** Top Right availability flag */ 361 UWORD8 u1_top_rt_avail; 362 363 /** Top Left availability flag */ 364 UWORD8 u1_top_lt_avail; 365 366 } nbr_avail_flags_t; 367 368 typedef struct 369 { 370 /** prev intra flag*/ 371 UWORD8 b1_prev_intra_luma_pred_flag : 1; 372 373 /** mpm_idx */ 374 UWORD8 b2_mpm_idx : 2; 375 376 /** reminder pred mode */ 377 UWORD8 b5_rem_intra_pred_mode : 5; 378 379 } intra_prev_rem_flags_t; 380 381 /** 382 ****************************************************************************** 383 * @brief calc (T+Q+RDOQ) output TU structure; entropy input TU structure 384 ****************************************************************************** 385 */ 386 typedef struct 387 { 388 /** base tu structure */ 389 tu_t s_tu; 390 391 /** offset of luma data in ecd buffer */ 392 WORD32 i4_luma_coeff_offset; 393 394 /** offset of cb data in ecd buffer */ 395 WORD32 ai4_cb_coeff_offset[2]; 396 397 /** offset of cr data in ecd buffer */ 398 WORD32 ai4_cr_coeff_offset[2]; 399 400 } tu_enc_loop_out_t; 401 402 typedef struct 403 { 404 /* L0 Motion Vector */ 405 mv_t s_l0_mv; 406 407 /* L1 Motion Vector */ 408 mv_t s_l1_mv; 409 410 /* L0 Ref index */ 411 WORD8 i1_l0_ref_idx; 412 413 /* L1 Ref index */ 414 WORD8 i1_l1_ref_idx; 415 416 /* L0 Ref Pic Buf ID */ 417 WORD8 i1_l0_pic_buf_id; 418 419 /* L1 Ref Pic Buf ID */ 420 WORD8 i1_l1_pic_buf_id; 421 422 /** intra flag */ 423 UWORD8 b1_intra_flag : 1; 424 425 /* Pred mode */ 426 UWORD8 b2_pred_mode : 2; 427 428 /* reserved flag can be used for something later */ 429 UWORD8 u1_reserved; 430 431 } pu_col_mv_t; 432 433 /*****************************************************************************/ 434 /* Encoder uses same structure as pu_t for prediction unit */ 435 /*****************************************************************************/ 436 437 /** 438 ****************************************************************************** 439 * @brief Encode loop (T+Q+RDOQ) output CU structure; entropy input CU structure 440 ****************************************************************************** 441 */ 442 typedef struct 443 { 444 /* CU X position in terms of min CU (8x8) units */ 445 UWORD32 b3_cu_pos_x : 3; 446 447 /* CU Y position in terms of min CU (8x8) units */ 448 UWORD32 b3_cu_pos_y : 3; 449 450 /** CU size in terms of min CU (8x8) units */ 451 UWORD32 b4_cu_size : 4; 452 453 /** transquant bypass flag ; 0 for this encoder */ 454 UWORD32 b1_tq_bypass_flag : 1; 455 456 /** cu skip flag */ 457 UWORD32 b1_skip_flag : 1; 458 459 /** intra / inter CU flag */ 460 UWORD32 b1_pred_mode_flag : 1; 461 462 /** indicates partition information for CU 463 * For intra 0 : for 2Nx2N / 1 for NxN iff CU=minCBsize 464 * For inter 0 : @sa PART_SIZE_E 465 */ 466 UWORD32 b3_part_mode : 3; 467 468 /** 0 for this encoder */ 469 UWORD32 b1_pcm_flag : 1; 470 471 /** only applicable for intra cu */ 472 UWORD32 b3_chroma_intra_pred_mode : 3; 473 474 /** no residue flag for cu */ 475 UWORD32 b1_no_residual_syntax_flag : 1; 476 477 /* flag to indicate if current CU is the first 478 CU of the Quantisation group*/ 479 UWORD32 b1_first_cu_in_qg : 1; 480 481 /** Intra prev and reminder flags 482 * if part is NxN the tntries 1,2,3 will be valid 483 * other wise only enry 0 will be set. 484 */ 485 intra_prev_rem_flags_t as_prev_rem[NUM_PU_PARTS]; 486 487 /** 488 * Access valid number of pus in this array based on u1_part_mode 489 * Moiton vector differentials and reference idx should be 490 * populated in this structure 491 * @remarks shall be accessed only for inter pus 492 */ 493 pu_t *ps_pu; 494 495 /** 496 * pointer to first tu of this cu. Each TU need to be populated 497 * in TU order by calc. Total TUs in CU is given by u2_num_tus_in_cu 498 */ 499 tu_enc_loop_out_t *ps_enc_tu; 500 501 /** total TUs in this CU; shall be 0 if b1_no_residual_syntax_flag = 1 */ 502 UWORD16 u2_num_tus_in_cu; 503 504 /** Coeff bufer pointer */ 505 /* Pointer to transform coeff data */ 506 /*************************************************************************/ 507 /* Following format is repeated for every coded TU */ 508 /* Luma Block */ 509 /* num_coeffs : 16 bits */ 510 /* zero_cols : 8 bits ( 1 bit per 4 columns) */ 511 /* sig_coeff_map : ((TU Size * TU Size) + 31) >> 5 number of WORD32s */ 512 /* coeff_data : Non zero coefficients */ 513 /* Cb Block (only for last TU in 4x4 case else for every luma TU) */ 514 /* num_coeffs : 16 bits */ 515 /* zero_cols : 8 bits ( 1 bit per 4 columns) */ 516 /* sig_coeff_map : ((TU Size * TU Size) + 31) >> 5 number of WORD32s */ 517 /* coeff_data : Non zero coefficients */ 518 /* Cr Block (only for last TU in 4x4 case else for every luma TU) */ 519 /* num_coeffs : 16 bits */ 520 /* zero_cols : 8 bits ( 1 bit per 4 columns) */ 521 /* sig_coeff_map : ((TU Size * TU Size) + 31) >> 5 number of WORD32s */ 522 /* coeff_data : Non zero coefficients */ 523 /*************************************************************************/ 524 void *pv_coeff; 525 526 /** qp used during for CU 527 * @remarks : 528 */ 529 WORD8 i1_cu_qp; 530 531 } cu_enc_loop_out_t; 532 533 /** 534 * SAO 535 */ 536 typedef struct 537 { 538 /** 539 * sao_type_idx_luma 540 */ 541 UWORD32 b3_y_type_idx : 3; 542 543 /** 544 * luma sao_band_position 545 */ 546 UWORD32 b5_y_band_pos : 5; 547 548 /** 549 * sao_type_idx_chroma 550 */ 551 UWORD32 b3_cb_type_idx : 3; 552 553 /** 554 * cb sao_band_position 555 */ 556 UWORD32 b5_cb_band_pos : 5; 557 558 /** 559 * sao_type_idx_chroma 560 */ 561 UWORD32 b3_cr_type_idx : 3; 562 563 /** 564 * cb sao_band_position 565 */ 566 UWORD32 b5_cr_band_pos : 5; 567 568 /*SAO Offsets 569 * In all these offsets, 0th element is not used 570 */ 571 /** 572 * luma SaoOffsetVal[i] 573 */ 574 WORD8 u1_y_offset[5]; 575 576 /** 577 * chroma cb SaoOffsetVal[i] 578 */ 579 WORD8 u1_cb_offset[5]; 580 581 /** 582 * chroma cr SaoOffsetVal[i] 583 */ 584 WORD8 u1_cr_offset[5]; 585 586 /** 587 * sao_merge_left_flag common for y,cb,cr 588 */ 589 UWORD32 b1_sao_merge_left_flag : 1; 590 591 /** 592 * sao_merge_up_flag common for y,cb,cr 593 */ 594 UWORD32 b1_sao_merge_up_flag : 1; 595 596 } sao_enc_t; 597 598 /** 599 ****************************************************************************** 600 * @brief ctb output structure; output of Encode loop, input to entropy 601 ****************************************************************************** 602 */ 603 typedef struct 604 { 605 /** 606 * bit0 : depth0 split flag, (64x64 splits) 607 * bits 1-3 : not used 608 * bits 4-7 : depth1 split flags; valid iff depth0 split=1 (32x32 splits) 609 * bits 8-23: depth2 split flags; (if 0 16x16 is cu else 8x8 min cu) 610 611 * if a split flag of n is set for depth 1, check the following split flags 612 * of [(8 + 4*(n-4)): (8 + 4*(n-4)+ 3)] for depth 2: 613 * 614 */ 615 UWORD32 u4_cu_split_flags; 616 617 /*************************************************************** 618 * For any given CU position CU_posx, CU_posy access 619 * au4_packed_tu_split_flags[(CU_posx >> 5)[(CU_posy >> 5)] 620 * Note : For CTB size smaller than 64x64 only use u4_packed_tu_split_flags[0] 621 ****************************************************************/ 622 623 /** 624 * access bits corresponding to actual CU size till leaf nodes 625 * bit0 : (32x32 TU split flag) 626 * bits 1-3 : not used 627 * bits 4-7 : (16x16 TUsplit flags) 628 * bits 8-23: (8x8 TU split flags) 629 630 * if a split flag of n is set for depth 1, check the following split flags 631 * of [(8 + 4*(n-4)): (8 + 4*(n-4)+ 3)] for depth 2: 632 * 633 * @remarks As tu sizes are relative to CU sizes the producer has to 634 * make sure the correctness of u4_packed_tu_split_flags. 635 * 636 * @remarks au4_packed_tu_split_flags_cu[1]/[2]/[3] to be used only 637 * for 64x64 ctb. 638 */ 639 UWORD32 au4_packed_tu_split_flags_cu[4]; 640 641 /** 642 * pointer to first CU of CTB. Each CU need to be populated 643 * in CU order by calc. Total CUs in CTB is given by u1_num_cus_in_ctb 644 */ 645 cu_enc_loop_out_t *ps_enc_cu; 646 647 /** total TUs in this CU; shall be 0 if b1_no_residual_syntax_flag = 1 */ 648 UWORD8 u1_num_cus_in_ctb; 649 650 /** CTB neighbour availability flags */ 651 nbr_avail_flags_t s_ctb_nbr_avail_flags; 652 653 /* SAO parameters of the CTB */ 654 sao_enc_t s_sao; 655 656 } ctb_enc_loop_out_t; 657 658 /** 659 ****************************************************************************** 660 * @brief cu inter candidate for encoder 661 ****************************************************************************** 662 */ 663 typedef struct 664 { 665 /** base pu structure 666 * access valid number of entries in this array based on u1_part_size 667 */ 668 pu_t as_inter_pu[NUM_INTER_PU_PARTS]; 669 670 /* TU split flag : tu_split_flag[0] represents the transform splits 671 * for CU size <= 32, for 64x64 each ai4_tu_split_flag corresponds 672 * to respective 32x32 */ 673 /* For a 8x8 TU - 1 bit used to indicate split */ 674 /* For a 16x16 TU - LSB used to indicate winner between 16 and 8 TU's. 4 other bits used to indicate split in each 8x8 quadrant */ 675 /* For a 32x32 TU - See above */ 676 WORD32 ai4_tu_split_flag[4]; 677 678 /* TU split flag : tu_split_flag[0] represents the transform splits 679 * for CU size <= 32, for 64x64 each ai4_tu_split_flag corresponds 680 * to respective 32x32 */ 681 /* For a 8x8 TU - 1 bit used to indicate split */ 682 /* For a 16x16 TU - LSB used to indicate winner between 16 and 8 TU's. 4 other bits used to indicate split in each 8x8 quadrant */ 683 /* For a 32x32 TU - See above */ 684 WORD32 ai4_tu_early_cbf[4]; 685 686 /**Pointer to the buffer having predicted data after mc in SATD stage 687 * Since we have 2 buffers for each candidate pred data for best merge candidate 688 * can be in one of the 2 buffers. 689 */ 690 UWORD8 *pu1_pred_data; 691 692 UWORD16 *pu2_pred_data; 693 694 UWORD8 *pu1_pred_data_scr; 695 696 UWORD16 *pu2_pred_data_src; 697 698 /* Total cost: SATD cost + MV cost */ 699 WORD32 i4_total_cost; 700 701 /** Stride for predicted data*/ 702 WORD32 i4_pred_data_stride; 703 704 /** @remarks u1_part_size can be non square only for Inter */ 705 UWORD8 b3_part_size : 3; /* @sa: PART_SIZE_E */ 706 707 /** evaluate transform for cusize iff this flag is 1 */ 708 /** this flag should be set 0 if CU is 64x64 */ 709 UWORD8 b1_eval_tx_cusize : 1; 710 711 /** evaluate transform for cusize/2 iff this flag is 1 */ 712 UWORD8 b1_eval_tx_cusize_by2 : 1; 713 714 /** Skip Flag : ME should always set this 0 for the candidates */ 715 UWORD8 b1_skip_flag : 1; 716 717 UWORD8 b1_intra_has_won : 1; 718 719 /* used to mark if this mode needs to be evaluated in auxiliary mode */ 720 /* if 1, this mode will be evaluated otherwise not.*/ 721 UWORD8 b1_eval_mark : 1; 722 723 } cu_inter_cand_t; 724 725 /** 726 ****************************************************************************** 727 * @brief cu intra candidate for encoder 728 ****************************************************************************** 729 */ 730 typedef struct 731 { 732 UWORD8 au1_intra_luma_mode_nxn_hash[NUM_PU_PARTS][MAX_INTRA_CANDIDATES]; 733 734 /** 735 * List of NxN PU candidates in CU for each partition 736 * valid only of if current cusize = mincusize 737 * +1 to signal the last flag invalid value of 255 needs to be stored 738 */ 739 UWORD8 au1_intra_luma_modes_nxn[NUM_PU_PARTS][(MAX_INTRA_CU_CANDIDATES * (4)) + 2 + 1]; 740 741 /* used to mark if this mode needs to be evaluated in auxiliary mode */ 742 /* if 1, this mode will be evaluated otherwise not.*/ 743 UWORD8 au1_nxn_eval_mark[NUM_PU_PARTS][MAX_INTRA_CU_CANDIDATES + 1]; 744 745 /** 746 * List of 2Nx2N PU candidates in CU 747 * +1 to signal the last flag invalid value of 255 needs to be stored 748 */ 749 UWORD8 au1_intra_luma_modes_2nx2n_tu_eq_cu[MAX_INTRA_CU_CANDIDATES + 1]; 750 751 /** 752 * List of 2Nx2N PU candidates in CU 753 * +1 to signal the last flag invalid value of 255 needs to be stored 754 */ 755 UWORD8 au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[MAX_INTRA_CU_CANDIDATES + 1]; 756 757 /* used to mark if this mode needs to be evaluated in auxiliary mode */ 758 /* if 1, this mode will be evaluated otherwise not.*/ 759 UWORD8 au1_2nx2n_tu_eq_cu_eval_mark[MAX_INTRA_CU_CANDIDATES + 1]; 760 761 /* used to mark if this mode needs to be evaluated in auxiliary mode */ 762 /* if 1, this mode will be evaluated otherwise not.*/ 763 UWORD8 au1_2nx2n_tu_eq_cu_by_2_eval_mark[MAX_INTRA_CU_CANDIDATES + 1]; 764 765 UWORD8 au1_num_modes_added[NUM_PU_PARTS]; 766 767 /** evaluate transform for cusize iff this flag is 1 */ 768 /** this flag should be set 0 if CU is 64x64 */ 769 UWORD8 b1_eval_tx_cusize : 1; 770 771 /** evaluate transform for cusize/2 iff this flag is 1 */ 772 UWORD8 b1_eval_tx_cusize_by2 : 1; 773 774 /** number of intra candidates for SATD evaluation in */ 775 UWORD8 b6_num_intra_cands : 6; 776 777 } cu_intra_cand_t; 778 779 /** 780 ****************************************************************************** 781 * @brief cu structure for mode analysis/evaluation 782 ****************************************************************************** 783 */ 784 typedef struct 785 { 786 /** CU X position in terms of min CU (8x8) units */ 787 UWORD8 b3_cu_pos_x : 3; 788 789 /** CU Y position in terms of min CU (8x8) units */ 790 UWORD8 b3_cu_pos_y : 3; 791 792 /** reserved bytes */ 793 UWORD8 b2_reserved : 2; 794 795 /** CU size 2N (width or height) in pixels */ 796 UWORD8 u1_cu_size; 797 798 /** Intra CU candidates after FAST CU decision (output of IPE) 799 * 8421 algo along with transform size evalution will 800 * be done for these modes in Encode loop pass. 801 */ 802 cu_intra_cand_t s_cu_intra_cand; 803 804 /** indicates the angular mode (0 - 34) for chroma, 805 * Note : No provision currently to take chroma through RDOPT or SATD 806 */ 807 UWORD8 u1_chroma_intra_pred_mode; 808 809 /** number of inter candidates in as_cu_inter_cand[] 810 * shall be 0 for intra frames. 811 * These inters are evaluated for RDOPT apart from merge/skip candidates 812 */ 813 UWORD8 u1_num_inter_cands; 814 815 /** List of candidates to be evalauted (SATD/RDOPT) for this CU 816 * @remarks : all merge/skip candidates not a part of this list 817 */ 818 cu_inter_cand_t as_cu_inter_cand[MAX_INTER_CU_CANDIDATES]; 819 820 WORD32 ai4_mv_cost[MAX_INTER_CU_CANDIDATES][NUM_INTER_PU_PARTS]; 821 822 #if REUSE_ME_COMPUTED_ERROR_FOR_INTER_CAND_SIFTING 823 WORD32 ai4_err_metric[MAX_INTER_CU_CANDIDATES][NUM_INTER_PU_PARTS]; 824 #endif 825 826 /* Flag to convey if Inta or Inter is the best candidate among the 827 candidates populated 828 0: If inter is the winner and 1: if Intra is winner*/ 829 UWORD8 u1_best_is_intra; 830 831 /** number of intra rdopt candidates 832 * @remarks : shall be <= u1_num_intra_cands 833 */ 834 UWORD8 u1_num_intra_rdopt_cands; 835 /** qp used during for CU 836 * @remarks : 837 */ 838 WORD8 i1_cu_qp; 839 /** Activity factor used in pre enc thread for deriving the Qp 840 * @remarks : This is in Q format 841 */ 842 WORD32 i4_act_factor[4][2]; 843 844 } cu_analyse_t; 845 846 /** 847 ****************************************************************************** 848 * @brief Structure for CU recursion 849 ****************************************************************************** 850 */ 851 typedef struct cur_ctb_cu_tree_t 852 { 853 /** CU X position in terms of min CU (8x8) units */ 854 UWORD8 b3_cu_pos_x : 3; 855 856 /** CU X position in terms of min CU (8x8) units */ 857 UWORD8 b3_cu_pos_y : 3; 858 859 /** reserved bytes */ 860 UWORD8 b2_reserved : 2; 861 862 UWORD8 u1_cu_size; 863 864 UWORD8 u1_intra_eval_enable; 865 866 UWORD8 u1_inter_eval_enable; 867 868 /* Flag that indicates whether to evaluate this node */ 869 /* during RDOPT evaluation. This does not mean that */ 870 /* evaluation of the children need to be abandoned */ 871 UWORD8 is_node_valid; 872 873 LWORD64 i8_best_rdopt_cost; 874 875 struct cur_ctb_cu_tree_t *ps_child_node_tl; 876 877 struct cur_ctb_cu_tree_t *ps_child_node_tr; 878 879 struct cur_ctb_cu_tree_t *ps_child_node_bl; 880 881 struct cur_ctb_cu_tree_t *ps_child_node_br; 882 883 } cur_ctb_cu_tree_t; 884 885 typedef struct 886 { 887 WORD32 num_best_results; 888 889 part_type_results_t as_best_results[NUM_BEST_ME_OUTPUTS]; 890 891 } block_data_32x32_t; 892 893 /** 894 ****************************************************************************** 895 * @brief Structure for storing data about all the 64x64 896 * block in a 64x64 CTB 897 ****************************************************************************** 898 */ 899 typedef block_data_32x32_t block_data_64x64_t; 900 901 /** 902 ****************************************************************************** 903 * @brief Structure for storing data about all 16 16x16 904 * blocks in a 64x64 CTB and each of their partitions 905 ****************************************************************************** 906 */ 907 typedef struct 908 { 909 WORD32 num_best_results; 910 911 /** 912 * mask of active partitions, Totally 17 bits. For a given partition 913 * id, as per PART_ID_T enum the corresponding bit position is 1/0 914 * indicating that partition is active or inactive 915 */ 916 /*WORD32 i4_part_mask;*/ 917 918 part_type_results_t as_best_results[NUM_BEST_ME_OUTPUTS]; 919 920 } block_data_16x16_t; 921 922 typedef struct 923 { 924 WORD32 num_best_results; 925 926 part_type_results_t as_best_results[NUM_BEST_ME_OUTPUTS]; 927 } block_data_8x8_t; 928 929 /** 930 ****************************************************************************** 931 * @brief Structure for data export from ME to Enc_Loop 932 ****************************************************************************** 933 */ 934 typedef struct 935 { 936 block_data_8x8_t as_8x8_block_data[64]; 937 938 block_data_16x16_t as_block_data[16]; 939 940 block_data_32x32_t as_32x32_block_data[4]; 941 942 block_data_64x64_t s_64x64_block_data; 943 944 } me_ctb_data_t; 945 946 /** 947 ****************************************************************************** 948 * @brief noise detection related structure 949 * 950 ****************************************************************************** 951 */ 952 953 typedef struct 954 { 955 WORD32 i4_noise_present; 956 957 UWORD8 au1_is_8x8Blk_noisy[MAX_CU_IN_CTB]; 958 959 UWORD32 au4_variance_src_16x16[MAX_CU_IN_CTB]; 960 } ihevce_ctb_noise_params; 961 962 /** 963 ****************************************************************************** 964 * @brief ctb structure for mode analysis/evaluation 965 ****************************************************************************** 966 */ 967 typedef struct 968 { 969 /** 970 * CU decision in a ctb is frozen by ME/IPE and populated in 971 * u4_packed_cu_split_flags. 972 * @remarks 973 * TODO:review comment 974 * bit0 : 64x64 split flag, (depth0 flag for 64x64 ctb unused for smaller ctb) 975 * bits 1-3 : not used 976 * bits 4-7 : 32x32 split flags; (depth1 flags for 64x64ctb / only bit4 used for 32x32ctb) 977 * bits 8-23: 16x16 split flags; (depth2 flags for 64x64 / depth1[bits8-11] for 32x32 [bit8 for ctb 16x16] ) 978 979 * if a split flag of n is set for depth 1, check the following split flags 980 * of [(8 + 4*(n-4)): (8 + 4*(n-4)+ 3)] for depth 2: 981 * 982 */ 983 UWORD32 u4_cu_split_flags; 984 985 UWORD8 u1_num_cus_in_ctb; 986 987 cur_ctb_cu_tree_t *ps_cu_tree; 988 989 me_ctb_data_t *ps_me_ctb_data; 990 991 ihevce_ctb_noise_params s_ctb_noise_params; 992 993 } ctb_analyse_t; 994 /** 995 ****************************************************************************** 996 * @brief Structures for tapping ssd and bit-estimate information for all CUs 997 ****************************************************************************** 998 */ 999 1000 typedef struct 1001 { 1002 LWORD64 i8_cost; 1003 WORD32 i4_idx; 1004 } cost_idx_t; 1005 1006 /** 1007 ****************************************************************************** 1008 * @brief reference/non reference pic context for encoder 1009 ****************************************************************************** 1010 */ 1011 typedef struct 1012 1013 { 1014 /** 1015 * YUV buffer discriptor for the recon 1016 * Allocation per frame for Y = ((ALIGN(frame width, MAX_CTB_SIZE)) + 2 * PAD_HORZ)* 1017 * ((ALIGN(frame height, MAX_CTB_SIZE)) + 2 * PAD_VERT) 1018 */ 1019 iv_enc_yuv_buf_t s_yuv_buf_desc; 1020 1021 iv_enc_yuv_buf_src_t s_yuv_buf_desc_src; 1022 1023 /* Pointer to Luma (Y) sub plane buffers Horz/ Vert / HV grid */ 1024 /* When (L0ME_IN_OPENLOOP_MODE == 1), additional buffer required to store */ 1025 /* the fullpel plane for use as reference */ 1026 UWORD8 *apu1_y_sub_pel_planes[3 + L0ME_IN_OPENLOOP_MODE]; 1027 1028 /** 1029 * frm level pointer to pu bank for colocated mv access 1030 * Allocation per frame = (ALIGN(frame width, MAX_CTB_SIZE) / MIN_PU_SIZE) * 1031 * (ALIGN(frame height, MAX_CTB_SIZE) / MIN_PU_SIZE) 1032 */ 1033 pu_col_mv_t *ps_frm_col_mv; 1034 /** 1035 ************************************************************************ 1036 * Pointer to a PU map stored at frame level, 1037 * It contains a 7 bit pu index in encoder order w.r.t to a ctb at a min 1038 * granularirty of MIN_PU_SIZE size. 1039 ************************************************************************ 1040 */ 1041 UWORD8 *pu1_frm_pu_map; 1042 1043 /** CTB level frame buffer to store the accumulated sum of 1044 * number of PUs for every row */ 1045 UWORD16 *pu2_num_pu_map; 1046 1047 /** Offsets in the PU buffer at every CTB level */ 1048 UWORD32 *pu4_pu_off; 1049 1050 /** Collocated POC for reference list 0 1051 * ToDo: Change the array size when multiple slices are to be supported */ 1052 WORD32 ai4_col_l0_poc[HEVCE_MAX_REF_PICS]; 1053 1054 /** Collocated POC for reference list 1 */ 1055 WORD32 ai4_col_l1_poc[HEVCE_MAX_REF_PICS]; 1056 1057 /** 0 = top field, 1 = bottom field */ 1058 WORD32 i4_bottom_field; 1059 1060 /** top field first input in case of interlaced case */ 1061 WORD32 i4_topfield_first; 1062 1063 /** top field first input in case of interlaced case */ 1064 WORD32 i4_poc; 1065 1066 /** unique buffer id */ 1067 WORD32 i4_buf_id; 1068 1069 /** is this reference frame or not */ 1070 WORD32 i4_is_reference; 1071 1072 /** Picture type of current picture */ 1073 WORD32 i4_pic_type; 1074 1075 /** Flag to indicate whether current pictute is free or in use */ 1076 WORD32 i4_is_free; 1077 1078 /** Bit0 - of this Flag to indicate whether current pictute needs to be deblocked, 1079 padded and hpel planes need to be generated. 1080 These are turned off typically in non referecne pictures when psnr 1081 and recon dump is disabled. 1082 1083 Bit1 - of this flag set to 1 if sao is enabled. This is to enable deblocking when sao is enabled 1084 */ 1085 WORD32 i4_deblk_pad_hpel_cur_pic; 1086 1087 /** 1088 * weight and offset for this ref pic. To be initialized for every pic 1089 * based on the lap output 1090 */ 1091 ihevce_wght_offst_t s_weight_offset; 1092 1093 /** 1094 * Reciprocal of the lumaweight in q15 format 1095 */ 1096 WORD32 i4_inv_luma_wt; 1097 1098 /** 1099 * Log to base 2 of the common denominator used for luma weights across all ref pics 1100 */ 1101 WORD32 i4_log2_wt_denom; 1102 1103 /** 1104 * Used as Reference for encoding current picture flag 1105 */ 1106 WORD32 i4_used_by_cur_pic_flag; 1107 1108 #if ADAPT_COLOCATED_FROM_L0_FLAG 1109 WORD32 i4_frame_qp; 1110 #endif 1111 /* 1112 * IDR GOP number 1113 */ 1114 1115 WORD32 i4_idr_gop_num; 1116 1117 /* 1118 * non-ref-free_flag 1119 */ 1120 WORD32 i4_non_ref_free_flag; 1121 /** 1122 * Dependency manager instance for ME - Prev recon dep 1123 */ 1124 void *pv_dep_mngr_recon; 1125 1126 /*display num*/ 1127 WORD32 i4_display_num; 1128 } recon_pic_buf_t; 1129 1130 /** 1131 ****************************************************************************** 1132 * @brief Lambda values used for various cost computations 1133 ****************************************************************************** 1134 */ 1135 typedef struct 1136 { 1137 /************************************************************************/ 1138 /* The fields with the string 'type2' in their names are required */ 1139 /* when both 8bit and hbd lambdas are needed. The lambdas corresponding */ 1140 /* to the bit_depth != internal_bit_depth are stored in these fields */ 1141 /************************************************************************/ 1142 1143 /** 1144 * Closed loop SSD Lambda 1145 * This is multiplied with bits for RD cost computations in SSD mode 1146 * This is represented in q format with shift of LAMBDA_Q_SHIFT 1147 */ 1148 LWORD64 i8_cl_ssd_lambda_qf; 1149 1150 LWORD64 i8_cl_ssd_type2_lambda_qf; 1151 1152 /** 1153 * Closed loop SSD Lambda for chroma residue (chroma qp is different from luma qp) 1154 * This is multiplied with bits for RD cost computations in SSD mode 1155 * This is represented in q format with shift of LAMBDA_Q_SHIFT 1156 */ 1157 LWORD64 i8_cl_ssd_lambda_chroma_qf; 1158 1159 LWORD64 i8_cl_ssd_type2_lambda_chroma_qf; 1160 1161 /** 1162 * Closed loop SAD Lambda 1163 * This is multiplied with bits for RD cost computations in SAD mode 1164 * This is represented in q format with shift of LAMBDA_Q_SHIFT 1165 */ 1166 WORD32 i4_cl_sad_lambda_qf; 1167 1168 WORD32 i4_cl_sad_type2_lambda_qf; 1169 1170 /** 1171 * Open loop SAD Lambda 1172 * This is multiplied with bits for RD cost computations in SAD mode 1173 * This is represented in q format with shift of LAMBDA_Q_SHIFT 1174 */ 1175 WORD32 i4_ol_sad_lambda_qf; 1176 1177 WORD32 i4_ol_sad_type2_lambda_qf; 1178 1179 /** 1180 * Closed loop SATD Lambda 1181 * This is multiplied with bits for RD cost computations in SATD mode 1182 * This is represented in q format with shift of LAMBDA_Q_SHIFT 1183 */ 1184 WORD32 i4_cl_satd_lambda_qf; 1185 1186 WORD32 i4_cl_satd_type2_lambda_qf; 1187 1188 /** 1189 * Open loop SATD Lambda 1190 * This is multiplied with bits for RD cost computations in SATD mode 1191 * This is represented in q format with shift of LAMBDA_Q_SHIFT 1192 */ 1193 WORD32 i4_ol_satd_lambda_qf; 1194 1195 WORD32 i4_ol_satd_type2_lambda_qf; 1196 1197 double lambda_modifier; 1198 1199 double lambda_uv_modifier; 1200 1201 UWORD32 u4_chroma_cost_weighing_factor; 1202 1203 } frm_lambda_ctxt_t; 1204 /** 1205 ****************************************************************************** 1206 * @brief Mode attributes for 4x4 block populated by early decision 1207 ****************************************************************************** 1208 */ 1209 typedef struct 1210 { 1211 /* If best mode is present or not */ 1212 UWORD8 mode_present; 1213 1214 /** Best mode for the current 4x4 prediction block */ 1215 UWORD8 best_mode; 1216 1217 /** sad for the best mode for the current 4x4 prediction block */ 1218 UWORD16 sad; 1219 1220 /** cost for the best mode for the current 4x4 prediction block */ 1221 UWORD16 sad_cost; 1222 1223 } ihevce_ed_mode_attr_t; //early decision 1224 /** 1225 ****************************************************************************** 1226 * @brief Structure at 8x8 block level which has parameters such as cur satd 1227 * for QP mod @ L0 level 1228 ****************************************************************************** 1229 */ 1230 typedef struct 1231 { 1232 /*Store SATD of current data at 8*8 level for current layer (L0)*/ 1233 WORD32 i4_8x8_cur_satd; 1234 } ihevce_8x8_L0_satd_t; 1235 /** 1236 ****************************************************************************** 1237 * @brief Structure at 8x8 block level mean for MEAN based QP mod 1238 ****************************************************************************** 1239 */ 1240 typedef struct 1241 { 1242 /*Store SATD of current data at 8*8 level for current layer (L0)*/ 1243 WORD16 i2_8x8_cur_mean; 1244 } ihevce_8x8_L0_mean_t; 1245 1246 //#define DEBUG_ED_CTB_POS 1247 /** 1248 ****************************************************************************** 1249 * @brief Structure at 4x4 block level which has parameters about early 1250 * intra or inter decision 1251 ****************************************************************************** 1252 */ 1253 typedef struct 1254 { 1255 /** 1256 * Final parameter of Intra-Inter early decision for the current 4x4. 1257 * 0 - invalid decision 1258 * 1 - eval intra only 1259 * 2 - eval inter only 1260 * 3 - eval both intra and inter 1261 */ 1262 UWORD8 intra_or_inter : 2; 1263 1264 UWORD8 merge_success : 1; 1265 1266 /** Best mode for the current 4x4 prediction block */ 1267 UWORD8 best_mode; 1268 1269 /* sad cost for the best prediction mode */ 1270 //UWORD16 best_sad_cost; 1271 1272 /** Best mode for the current 4x4 prediction block */ 1273 UWORD8 best_merge_mode; 1274 1275 /*Store SATD at 4*4 level for current layer (L1)*/ 1276 WORD32 i4_4x4_satd; 1277 1278 /*Store SATD of current data at 4*4 level for current layer (L1)*/ 1279 WORD32 i4_4x4_cur_satd; 1280 1281 } ihevce_ed_blk_t; //early decision 1282 1283 /* l1 ipe ctb analyze structure */ 1284 /* Contains cu level qp mod related information for all possible cu 1285 sizes (16,32,64 in L0) in a CTB*/ 1286 typedef struct 1287 { 1288 WORD32 i4_sum_4x4_satd[16]; 1289 WORD32 i4_min_4x4_satd[16]; 1290 1291 /*satd for L1_8x8 blocks in L1_32x32 1292 16 - num L1_8x8 in L1_32x32 1293 2 => 1294 0 - sum of L1_4x4 @ L1_8x8 1295 - equivalent to transform size of 16x16 @ L0 1296 1 - min/median of L1_4x4 @ L1_8x8 1297 - equivalent to transform size of 8x8 @ L0 1298 */ 1299 WORD32 i4_8x8_satd[16][2]; 1300 1301 /*satd for L1_16x16 blocks in L1_32x32 1302 4 - num L1_16x16 in L1_32x32 1303 3 => 1304 0 - sum of (sum of L1_4x4 @ L1_8x8) @ L1_16x16 1305 - equivalent to transform size of 32x32 @ L0 1306 1 - min/median of (sum of L1_4x4 @ L1_8x8) @ L1_16x16 1307 - equivalent to transform size of 16x16 @ L0 1308 2 - min/median of (min/median of L1_4x4 @ L1_8x8) @ L1_16x16 1309 - equivalent to transform size of 8x8 @ L0 1310 */ 1311 WORD32 i4_16x16_satd[4][3]; 1312 1313 /*satd for 32x32 block in L1*/ 1314 /*Please note that i4_32x32_satd[0][3] contains sum of all 32x32 */ 1315 /*satd for L1_32x32 blocks in L1_32x32 1316 1 - num L1_32x32 in L1_32x32 1317 4 => 1318 0 - min/median of (sum of (sum of L1_4x4 @ L1_8x8) @ L1_16x16) @ L1_32x32 1319 - equivalent to transform size of 32x32 @ L0 1320 1 - min/median of (sum of L1_4x4 @ L1_8x8) @ L1_32x32 1321 - equivalent to transform size of 16x16 @ L0 1322 2 - min/median of (min/median of L1_4x4 @ L1_8x8) @ L1_32x32 1323 - equivalent to transform size of 8x8 @ L0 1324 3 - sum of (sum of (sum of L1_4x4 @ L1_8x8) @ L1_16x16) @ L1_32x32 1325 */ 1326 WORD32 i4_32x32_satd[1][4]; 1327 1328 /*Store SATD at 8x8 level for current layer (L1)*/ 1329 WORD32 i4_best_satd_8x8[16]; 1330 1331 /* EIID: This will be used for early inter intra decisions */ 1332 /*SAD at 8x8 level for current layer (l1) */ 1333 /*Cost based on sad at 8x8 level for current layer (l1) */ 1334 WORD32 i4_best_sad_cost_8x8_l1_ipe[16]; 1335 1336 WORD32 i4_best_sad_8x8_l1_ipe[16]; 1337 /* SAD at 8x8 level for ME. All other cost are IPE cost */ 1338 WORD32 i4_best_sad_cost_8x8_l1_me[16]; 1339 1340 /* SAD at 8x8 level for ME. for given reference */ 1341 WORD32 i4_sad_cost_me_for_ref[16]; 1342 1343 /* SAD at 8x8 level for ME. for given reference */ 1344 WORD32 i4_sad_me_for_ref[16]; 1345 1346 /* SAD at 8x8 level for ME. All other cost are IPE cost */ 1347 WORD32 i4_best_sad_8x8_l1_me[16]; 1348 1349 WORD32 i4_best_sad_8x8_l1_me_for_decide[16]; 1350 1351 /*Mean @ L0 16x16*/ 1352 WORD32 ai4_16x16_mean[16]; 1353 1354 /*Mean @ L0 32x32*/ 1355 WORD32 ai4_32x32_mean[4]; 1356 1357 /*Mean @ L0 64x64*/ 1358 WORD32 i4_64x64_mean; 1359 1360 } ihevce_ed_ctb_l1_t; //early decision 1361 1362 /** 1363 ****************************************************************************** 1364 * @brief 8x8 Intra analyze structure 1365 ****************************************************************************** 1366 */ 1367 typedef struct 1368 { 1369 /** Best intra modes for 8x8 transform. 1370 * Insert 255 in the end to limit number of modes 1371 */ 1372 UWORD8 au1_best_modes_8x8_tu[MAX_INTRA_CU_CANDIDATES + 1]; 1373 1374 /** Best 8x8 intra modes for 4x4 transform 1375 * Insert 255 in the end to limit number of modes 1376 */ 1377 UWORD8 au1_best_modes_4x4_tu[MAX_INTRA_CU_CANDIDATES + 1]; 1378 1379 /** Best 4x4 intra modes 1380 * Insert 255 in the end to limit number of modes 1381 */ 1382 UWORD8 au1_4x4_best_modes[4][MAX_INTRA_CU_CANDIDATES + 1]; 1383 1384 /** best 8x8 intra sad/SATD cost */ 1385 WORD32 i4_best_intra_cost; 1386 1387 /** flag to indicate if nxn pu mode (different pu at 4x4 level) is enabled */ 1388 UWORD8 b1_enable_nxn : 1; 1389 1390 /** valid cu flag : required for incomplete ctbs at frame boundaries */ 1391 UWORD8 b1_valid_cu : 1; 1392 1393 /** dummy bits */ 1394 UWORD8 b6_reserved : 6; 1395 1396 } intra8_analyse_t; 1397 1398 /** 1399 ****************************************************************************** 1400 * @brief 16x16 Intra analyze structure 1401 ****************************************************************************** 1402 */ 1403 typedef struct 1404 { 1405 /** Best intra modes for 16x16 transform. 1406 * Insert 255 in the end to limit number of modes 1407 */ 1408 UWORD8 au1_best_modes_16x16_tu[MAX_INTRA_CU_CANDIDATES + 1]; 1409 1410 /** Best 16x16 intra modes for 8x8 transform 1411 * Insert 255 in the end to limit number of modes 1412 */ 1413 UWORD8 au1_best_modes_8x8_tu[MAX_INTRA_CU_CANDIDATES + 1]; 1414 1415 /** 8x8 children intra analyze for this 16x16 */ 1416 intra8_analyse_t as_intra8_analyse[4]; 1417 1418 /* best 16x16 intra sad/SATD cost */ 1419 WORD32 i4_best_intra_cost; 1420 1421 /* indicates if 16x16 is best cu or 8x8 cu */ 1422 UWORD8 b1_split_flag : 1; 1423 1424 /* indicates if 8x8 vs 16x16 rdo evaluation needed */ 1425 /* or only 8x8's rdo evaluation needed */ 1426 UWORD8 b1_merge_flag : 1; 1427 1428 /** 1429 * valid cu flag : required for incomplete ctbs at frame boundaries 1430 * or if CTB size is lower than 32 1431 */ 1432 UWORD8 b1_valid_cu : 1; 1433 1434 /** dummy bits */ 1435 UWORD8 b6_reserved : 5; 1436 1437 } intra16_analyse_t; 1438 1439 /** 1440 ****************************************************************************** 1441 * @brief 32x32 Intra analyze structure 1442 ****************************************************************************** 1443 */ 1444 typedef struct 1445 { 1446 /** Best intra modes for 32x32 transform. 1447 * Insert 255 in the end to limit number of modes 1448 */ 1449 UWORD8 au1_best_modes_32x32_tu[MAX_INTRA_CU_CANDIDATES + 1]; 1450 1451 /** Best 32x32 intra modes for 16x16 transform 1452 * Insert 255 in the end to limit number of modes 1453 */ 1454 UWORD8 au1_best_modes_16x16_tu[MAX_INTRA_CU_CANDIDATES + 1]; 1455 1456 /** 16x16 children intra analyze for this 32x32 */ 1457 intra16_analyse_t as_intra16_analyse[4]; 1458 1459 /* best 32x32 intra sad/SATD cost */ 1460 WORD32 i4_best_intra_cost; 1461 1462 /* indicates if 32x32 is best cu or 16x16 cu */ 1463 UWORD8 b1_split_flag : 1; 1464 1465 /* indicates if 32x32 vs 16x16 rdo evaluation needed */ 1466 /* or 16x16 vs 8x8 evaluation is needed */ 1467 UWORD8 b1_merge_flag : 1; 1468 1469 /** 1470 * valid cu flag : required for incomplete ctbs at frame boundaries 1471 * or if CTB size is lower than 64 1472 */ 1473 UWORD8 b1_valid_cu : 1; 1474 1475 /** dummy bits */ 1476 UWORD8 b6_reserved : 5; 1477 1478 } intra32_analyse_t; 1479 1480 /** 1481 ****************************************************************************** 1482 * @brief IPE L0 analyze structure for L0 ME to do intra/inter CU decisions 1483 * This is a CTB level structure encapsulating IPE modes, cost at all 1484 * level. IPE also recommemds max intra CU sizes which is required 1485 * by ME for CU size determination in intra dominant CTB 1486 ****************************************************************************** 1487 */ 1488 typedef struct 1489 { 1490 /** Best 64x64 intra modes for 32x32 transform. 1491 * Insert 255 in the end to limit number of modes 1492 */ 1493 UWORD8 au1_best_modes_32x32_tu[MAX_INTRA_CU_CANDIDATES + 1]; 1494 1495 /** 32x32 children intra analyze for this 32x32 */ 1496 intra32_analyse_t as_intra32_analyse[4]; 1497 1498 /* indicates if 64x64 is best CUs or 32x32 CUs */ 1499 UWORD8 u1_split_flag; 1500 1501 /* CTB level best 8x8 intra costs */ 1502 WORD32 ai4_best8x8_intra_cost[MAX_CU_IN_CTB]; 1503 1504 /* CTB level best 16x16 intra costs */ 1505 WORD32 ai4_best16x16_intra_cost[MAX_CU_IN_CTB >> 2]; 1506 1507 /* CTB level best 32x32 intra costs */ 1508 WORD32 ai4_best32x32_intra_cost[MAX_CU_IN_CTB >> 4]; 1509 1510 /* best 64x64 intra cost */ 1511 WORD32 i4_best64x64_intra_cost; 1512 1513 /** 1514 * CTB level early intra / inter decision at 8x8 block level 1515 * 0 - invalid decision 1516 * 1 - eval intra only 1517 * 2 - eval inter only 1518 * 3 - eval both intra and inter 1519 */ 1520 /* Z scan format */ 1521 WORD8 ai1_early_intra_inter_decision[MAX_CU_IN_CTB]; 1522 1523 /* 1524 @L0 level 1525 4 => 0 - 32x32 TU in 64x64 CU 1526 1 - 16x16 TU in 64x64 CU 1527 2 - 8x8 TU in 64x64 CU 1528 3 - 64x64 CU 1529 2 => Intra/Inter */ 1530 WORD32 i4_64x64_act_factor[4][2]; 1531 1532 /* 1533 @L0 level 1534 4 => num 32x32 in CTB 1535 3 => 0 - 32x32 TU in 64x64 CU 1536 1 - 16x16 TU in 64x64 CU 1537 2 - 8x8 TU in 64x64 CU 1538 2 => Intra/Inter */ 1539 WORD32 i4_32x32_act_factor[4][3][2]; 1540 1541 /* 1542 @L0 level 1543 16 => num 16x16 in CTB 1544 2 => 0 - 16x16 TU in 64x64 CU 1545 1 - 8x8 TU in 64x64 CU 1546 2 => Intra/Inter */ 1547 WORD32 i4_16x16_act_factor[16][2][2]; 1548 1549 WORD32 nodes_created_in_cu_tree; 1550 1551 cur_ctb_cu_tree_t *ps_cu_tree_root; 1552 1553 WORD32 ai4_8x8_act_factor[16]; 1554 WORD32 ai4_best_sad_8x8_l1_me[MAX_CU_IN_CTB]; 1555 WORD32 ai4_best_sad_8x8_l1_ipe[MAX_CU_IN_CTB]; 1556 WORD32 ai4_best_sad_cost_8x8_l1_me[MAX_CU_IN_CTB]; 1557 WORD32 ai4_best_sad_cost_8x8_l1_ipe[MAX_CU_IN_CTB]; 1558 1559 /*Ctb level accumalated satd*/ 1560 WORD32 i4_ctb_acc_satd; 1561 1562 /*Ctb level accumalated mpm bits*/ 1563 WORD32 i4_ctb_acc_mpm_bits; 1564 1565 } ipe_l0_ctb_analyse_for_me_t; 1566 1567 typedef struct 1568 { 1569 WORD16 i2_mv_x; 1570 WORD16 i2_mv_y; 1571 } global_mv_t; 1572 1573 /** 1574 ****************************************************************************** 1575 * @brief Pre Encode pass and ME pass shared variables and buffers 1576 ****************************************************************************** 1577 */ 1578 typedef struct 1579 { 1580 /** 1581 * Buffer id 1582 */ 1583 WORD32 i4_buf_id; 1584 1585 /** 1586 * Flag will be set to 1 by frame processing thread after receiving flush 1587 * command from application 1588 */ 1589 WORD32 i4_end_flag; 1590 1591 /** frame leve ctb analyse buffer pointer */ 1592 ctb_analyse_t *ps_ctb_analyse; 1593 1594 /** frame level cu analyse buffer pointer for IPE */ 1595 //cu_analyse_t *ps_cu_analyse; 1596 1597 /** current input pointer */ 1598 ihevce_lap_enc_buf_t *ps_curr_inp; 1599 1600 /** current inp buffer id */ 1601 WORD32 curr_inp_buf_id; 1602 1603 /** Slice header parameters */ 1604 slice_header_t s_slice_hdr; 1605 1606 /** sps parameters activated by current slice */ 1607 sps_t *ps_sps; 1608 1609 /** pps parameters activated by current slice */ 1610 pps_t *ps_pps; 1611 1612 /** vps parameters activated by current slice */ 1613 vps_t *ps_vps; 1614 /** Pointer to Penultilate Layer context memory internally has MV bank buff and related params */ 1615 void *pv_me_lyr_ctxt; 1616 1617 /** Pointer to Penultilate Layer NV bank context memory */ 1618 void *pv_me_lyr_bnk_ctxt; 1619 1620 /** Pointer to Penultilate Layer MV bank buff */ 1621 void *pv_me_mv_bank; 1622 1623 /** Pointer to Penultilate Layer reference idx buffer */ 1624 void *pv_me_ref_idx; 1625 /** 1626 * Array to store 8x8 cost (partial 8x8 sad + level adjusted cost) 1627 * The order of storing is raster scan order within CTB and 1628 * CTB order is raster scan within frame. 1629 */ 1630 double *plf_intra_8x8_cost; 1631 1632 /** 1633 * L0 layer ctb anaylse frame level buffer. 1634 * IPE wil populate the cost and best modes at all levels in this buffer 1635 * for every CTB in a frame 1636 */ 1637 // moved to shorter buffer queue 1638 //ipe_l0_ctb_analyse_for_me_t *ps_ipe_analyse_ctb; 1639 1640 /** Layer L1 buffer pointer */ 1641 ihevce_ed_blk_t *ps_layer1_buf; 1642 1643 /** Layer L2 buffer pointer */ 1644 ihevce_ed_blk_t *ps_layer2_buf; 1645 1646 /*ME reverse map info*/ 1647 UWORD8 *pu1_me_reverse_map_info; 1648 1649 /** Buffer pointer for CTB level information in pre intra pass*/ 1650 ihevce_ed_ctb_l1_t *ps_ed_ctb_l1; 1651 1652 /* L0 cur 8x8 satd for QP mod*/ 1653 ihevce_8x8_L0_satd_t *ps_layer0_cur_satd; 1654 1655 /* L0 cur 8x8 mean for QP mod*/ 1656 ihevce_8x8_L0_mean_t *ps_layer0_cur_mean; 1657 1658 /** vps parameters activated by current slice */ 1659 sei_params_t s_sei; 1660 1661 /** nal_type for the slice to be encoded */ 1662 WORD32 i4_slice_nal_type; 1663 1664 /** input time stamp in terms of ticks: lower 32 */ 1665 WORD32 i4_inp_timestamp_low; 1666 1667 /** input time stamp in terms of ticks: higher 32 */ 1668 WORD32 i4_inp_timestamp_high; 1669 1670 /** input frame ctxt of app to be retured in output buffer */ 1671 void *pv_app_frm_ctxt; 1672 1673 /** current frm valid flag : 1674 * will be 1 if valid input was processed by frame proc thrd 1675 */ 1676 WORD32 i4_frm_proc_valid_flag; 1677 1678 /** 1679 * Qp to be used for current frame 1680 */ 1681 WORD32 i4_curr_frm_qp; 1682 1683 /** 1684 * Frame level Lambda parameters 1685 */ 1686 frm_lambda_ctxt_t as_lambda_prms[IHEVCE_MAX_NUM_BITRATES]; 1687 1688 /** Frame-levelSATDcost accumalator */ 1689 LWORD64 i8_frame_acc_satd_cost; 1690 1691 /** Frame - L1 coarse me cost accumulated */ 1692 LWORD64 i8_acc_frame_coarse_me_cost; 1693 /** Frame - L1 coarse me cost accumulated */ 1694 //LWORD64 i8_acc_frame_coarse_me_cost_for_ref; 1695 1696 /** Frame - L1 coarse me sad accumulated */ 1697 LWORD64 i8_acc_frame_coarse_me_sad; 1698 1699 /* Averge activity of 4x4 blocks from previous frame 1700 * If L1, maps to 8*8 in L0 1701 */ 1702 WORD32 i4_curr_frame_4x4_avg_act; 1703 1704 WORD32 ai4_mod_factor_derived_by_variance[2]; 1705 1706 float f_strength; 1707 1708 /* Averge activity of 8x8 blocks from previous frame 1709 * If L1, maps to 16*16 in L0 1710 */ 1711 1712 long double ld_curr_frame_8x8_log_avg[2]; 1713 1714 LWORD64 i8_curr_frame_8x8_avg_act[2]; 1715 1716 LWORD64 i8_curr_frame_8x8_sum_act[2]; 1717 1718 WORD32 i4_curr_frame_8x8_sum_act_for_strength[2]; 1719 1720 ULWORD64 u8_curr_frame_8x8_sum_act_sqr; 1721 1722 WORD32 i4_curr_frame_8x8_num_blks[2]; 1723 1724 LWORD64 i8_acc_frame_8x8_sum_act[2]; 1725 LWORD64 i8_acc_frame_8x8_sum_act_sqr; 1726 WORD32 i4_acc_frame_8x8_num_blks[2]; 1727 LWORD64 i8_acc_frame_8x8_sum_act_for_strength; 1728 LWORD64 i8_curr_frame_8x8_sum_act_for_strength; 1729 1730 /* Averge activity of 16x16 blocks from previous frame 1731 * If L1, maps to 32*32 in L0 1732 */ 1733 1734 long double ld_curr_frame_16x16_log_avg[3]; 1735 1736 LWORD64 i8_curr_frame_16x16_avg_act[3]; 1737 1738 LWORD64 i8_curr_frame_16x16_sum_act[3]; 1739 1740 WORD32 i4_curr_frame_16x16_num_blks[3]; 1741 1742 LWORD64 i8_acc_frame_16x16_sum_act[3]; 1743 WORD32 i4_acc_frame_16x16_num_blks[3]; 1744 1745 /* Averge activity of 32x32 blocks from previous frame 1746 * If L1, maps to 64*64 in L0 1747 */ 1748 1749 long double ld_curr_frame_32x32_log_avg[3]; 1750 1751 LWORD64 i8_curr_frame_32x32_avg_act[3]; 1752 1753 global_mv_t s_global_mv[MAX_NUM_REF]; 1754 LWORD64 i8_curr_frame_32x32_sum_act[3]; 1755 1756 WORD32 i4_curr_frame_32x32_num_blks[3]; 1757 1758 LWORD64 i8_acc_frame_32x32_sum_act[3]; 1759 WORD32 i4_acc_frame_32x32_num_blks[3]; 1760 1761 LWORD64 i8_acc_num_blks_high_sad; 1762 1763 LWORD64 i8_total_blks; 1764 1765 WORD32 i4_complexity_percentage; 1766 1767 WORD32 i4_is_high_complex_region; 1768 1769 WORD32 i4_avg_noise_thrshld_4x4; 1770 1771 LWORD64 i8_curr_frame_mean_sum; 1772 WORD32 i4_curr_frame_mean_num_blks; 1773 LWORD64 i8_curr_frame_avg_mean_act; 1774 1775 } pre_enc_me_ctxt_t; 1776 1777 /** 1778 ****************************************************************************** 1779 * @brief buffers from L0 IPE to ME and enc loop 1780 ****************************************************************************** 1781 */ 1782 typedef struct 1783 { 1784 WORD32 i4_size; 1785 1786 ipe_l0_ctb_analyse_for_me_t *ps_ipe_analyse_ctb; 1787 } pre_enc_L0_ipe_encloop_ctxt_t; 1788 /** 1789 ****************************************************************************** 1790 * @brief Frame process and Entropy coding pass shared variables and buffers 1791 ****************************************************************************** 1792 */ 1793 1794 typedef struct 1795 { 1796 /*PIC level Info*/ 1797 ULWORD64 i8_total_cu; 1798 ULWORD64 i8_total_cu_min_8x8; 1799 ULWORD64 i8_total_pu; 1800 ULWORD64 i8_total_intra_cu; 1801 ULWORD64 i8_total_inter_cu; 1802 ULWORD64 i8_total_skip_cu; 1803 ULWORD64 i8_total_cu_based_on_size[4]; 1804 1805 ULWORD64 i8_total_intra_pu; 1806 ULWORD64 i8_total_merge_pu; 1807 ULWORD64 i8_total_non_skipped_inter_pu; 1808 1809 ULWORD64 i8_total_2nx2n_intra_pu[4]; 1810 ULWORD64 i8_total_nxn_intra_pu; 1811 ULWORD64 i8_total_2nx2n_inter_pu[4]; 1812 ULWORD64 i8_total_smp_inter_pu[4]; 1813 ULWORD64 i8_total_amp_inter_pu[3]; 1814 ULWORD64 i8_total_nxn_inter_pu[3]; 1815 1816 ULWORD64 i8_total_L0_mode; 1817 ULWORD64 i8_total_L1_mode; 1818 ULWORD64 i8_total_BI_mode; 1819 1820 ULWORD64 i8_total_L0_ref_idx[MAX_DPB_SIZE]; 1821 ULWORD64 i8_total_L1_ref_idx[MAX_DPB_SIZE]; 1822 1823 ULWORD64 i8_total_tu; 1824 ULWORD64 i8_total_non_coded_tu; 1825 ULWORD64 i8_total_inter_coded_tu; 1826 ULWORD64 i8_total_intra_coded_tu; 1827 1828 ULWORD64 i8_total_tu_based_on_size[4]; 1829 ULWORD64 i8_total_tu_cu64[4]; 1830 ULWORD64 i8_total_tu_cu32[4]; 1831 ULWORD64 i8_total_tu_cu16[3]; 1832 ULWORD64 i8_total_tu_cu8[2]; 1833 1834 LWORD64 i8_total_qp; 1835 LWORD64 i8_total_qp_min_cu; 1836 WORD32 i4_min_qp; 1837 WORD32 i4_max_qp; 1838 LWORD64 i8_sum_squared_frame_qp; 1839 LWORD64 i8_total_frame_qp; 1840 WORD32 i4_max_frame_qp; 1841 float f_total_buffer_underflow; 1842 float f_total_buffer_overflow; 1843 float f_max_buffer_underflow; 1844 float f_max_buffer_overflow; 1845 1846 UWORD8 i1_num_ref_idx_l0_active; 1847 UWORD8 i1_num_ref_idx_l1_active; 1848 1849 WORD32 i4_ref_poc_l0[MAX_DPB_SIZE]; 1850 WORD32 i4_ref_poc_l1[MAX_DPB_SIZE]; 1851 1852 WORD8 i1_list_entry_l0[MAX_DPB_SIZE]; 1853 DOUBLE i2_luma_weight_l0[MAX_DPB_SIZE]; 1854 WORD16 i2_luma_offset_l0[MAX_DPB_SIZE]; 1855 WORD8 i1_list_entry_l1[MAX_DPB_SIZE]; 1856 DOUBLE i2_luma_weight_l1[MAX_DPB_SIZE]; 1857 WORD16 i2_luma_offset_l1[MAX_DPB_SIZE]; 1858 1859 ULWORD64 u8_bits_estimated_intra; 1860 ULWORD64 u8_bits_estimated_inter; 1861 ULWORD64 u8_bits_estimated_slice_header; 1862 ULWORD64 u8_bits_estimated_sao; 1863 ULWORD64 u8_bits_estimated_split_cu_flag; 1864 ULWORD64 u8_bits_estimated_cu_hdr_bits; 1865 ULWORD64 u8_bits_estimated_split_tu_flag; 1866 ULWORD64 u8_bits_estimated_qp_delta_bits; 1867 ULWORD64 u8_bits_estimated_cbf_luma_bits; 1868 ULWORD64 u8_bits_estimated_cbf_chroma_bits; 1869 1870 ULWORD64 u8_bits_estimated_res_luma_bits; 1871 ULWORD64 u8_bits_estimated_res_chroma_bits; 1872 1873 ULWORD64 u8_bits_estimated_ref_id; 1874 ULWORD64 u8_bits_estimated_mvd; 1875 ULWORD64 u8_bits_estimated_merge_flag; 1876 ULWORD64 u8_bits_estimated_mpm_luma; 1877 ULWORD64 u8_bits_estimated_mpm_chroma; 1878 1879 ULWORD64 u8_total_bits_generated; 1880 ULWORD64 u8_total_bits_vbv; 1881 1882 ULWORD64 u8_total_I_bits_generated; 1883 ULWORD64 u8_total_P_bits_generated; 1884 ULWORD64 u8_total_B_bits_generated; 1885 1886 UWORD32 u4_frame_sad; 1887 UWORD32 u4_frame_intra_sad; 1888 UWORD32 u4_frame_inter_sad; 1889 1890 ULWORD64 i8_frame_cost; 1891 ULWORD64 i8_frame_intra_cost; 1892 ULWORD64 i8_frame_inter_cost; 1893 } s_pic_level_acc_info_t; 1894 1895 typedef struct 1896 { 1897 UWORD32 u4_target_bit_rate_sei_entropy; 1898 UWORD32 u4_buffer_size_sei_entropy; 1899 UWORD32 u4_dbf_entropy; 1900 1901 } s_pic_level_sei_info_t; 1902 /** 1903 ****************************************************************************** 1904 * @brief ME pass and Main enocde pass shared variables and buffers 1905 ****************************************************************************** 1906 */ 1907 typedef struct 1908 { 1909 /** 1910 * Buffer id 1911 */ 1912 WORD32 i4_buf_id; 1913 1914 /** 1915 * Flag will be set to 1 by frame processing thread after receiving flush 1916 * command from application 1917 */ 1918 WORD32 i4_end_flag; 1919 1920 /** current input pointer */ 1921 ihevce_lap_enc_buf_t *ps_curr_inp; 1922 1923 /** current inp buffer id */ 1924 WORD32 curr_inp_buf_id; 1925 1926 /** current input buffers from ME */ 1927 pre_enc_me_ctxt_t *ps_curr_inp_from_me_prms; 1928 1929 /** current inp buffer id from ME */ 1930 WORD32 curr_inp_from_me_buf_id; 1931 1932 /** current input buffers from L0 IPE */ 1933 pre_enc_L0_ipe_encloop_ctxt_t *ps_curr_inp_from_l0_ipe_prms; 1934 1935 /** current inp buffer id from L0 IPE */ 1936 WORD32 curr_inp_from_l0_ipe_buf_id; 1937 1938 /** Slice header parameters */ 1939 slice_header_t s_slice_hdr; 1940 1941 /** current frm valid flag : 1942 * will be 1 if valid input was processed by frame proc thrd 1943 */ 1944 WORD32 i4_frm_proc_valid_flag; 1945 1946 /** 1947 * Array of reference picture list for ping instance 1948 * 2=> ref_pic_list0 and ref_pic_list1 1949 */ 1950 recon_pic_buf_t as_ref_list[IHEVCE_MAX_NUM_BITRATES][2][HEVCE_MAX_REF_PICS * 2]; 1951 1952 /** 1953 * Array of reference picture list 1954 * 2=> ref_pic_list0 and ref_pic_list1 1955 */ 1956 recon_pic_buf_t *aps_ref_list[IHEVCE_MAX_NUM_BITRATES][2][HEVCE_MAX_REF_PICS * 2]; 1957 1958 /** Job Queue Memory encode */ 1959 job_queue_t *ps_job_q_enc; 1960 1961 /** Array of Job Queue handles of enc group for ping and pong instance*/ 1962 job_queue_handle_t as_job_que_enc_hdls[NUM_ENC_JOBS_QUES]; 1963 1964 /** Array of Job Queue handles of enc group for re-encode*/ 1965 job_queue_handle_t as_job_que_enc_hdls_reenc[NUM_ENC_JOBS_QUES]; 1966 /** frame level me_ctb_data_t buffer pointer 1967 */ 1968 me_ctb_data_t *ps_cur_ctb_me_data; 1969 1970 /** frame level cur_ctb_cu_tree_t buffer pointer for ME 1971 */ 1972 cur_ctb_cu_tree_t *ps_cur_ctb_cu_tree; 1973 1974 /** Pointer to Dep. Mngr for CTBs processed in every row of a frame. 1975 * ME is producer, EncLoop is the consumer 1976 */ 1977 void *pv_dep_mngr_encloop_dep_me; 1978 1979 } me_enc_rdopt_ctxt_t; 1980 1981 typedef struct 1982 { 1983 UWORD32 u4_payload_type; 1984 UWORD32 u4_payload_length; 1985 UWORD8 *pu1_sei_payload; 1986 } sei_payload_t; 1987 1988 typedef struct 1989 { 1990 /** 1991 * Flag will be set to 1 by frame processing thread after receiving flush 1992 * command from application 1993 */ 1994 WORD32 i4_end_flag; 1995 1996 /** frame level ctb allocation for ctb after aligning to max cu size */ 1997 ctb_enc_loop_out_t *ps_frm_ctb_data; 1998 1999 /** frame level cu allocation for ctb after aligning to max cu size */ 2000 cu_enc_loop_out_t *ps_frm_cu_data; 2001 2002 /** frame level tu allocation for ctb after aligning to max cu size */ 2003 tu_enc_loop_out_t *ps_frm_tu_data; 2004 2005 /** frame level pu allocation for ctb after aligning to max cu size */ 2006 pu_t *ps_frm_pu_data; 2007 2008 /** frame level coeff allocation for ctb after aligning to max cu size */ 2009 void *pv_coeff_data; 2010 2011 /** Slice header parameters */ 2012 slice_header_t s_slice_hdr; 2013 2014 /** sps parameters activated by current slice */ 2015 sps_t *ps_sps; 2016 2017 /** pps parameters activated by current slice */ 2018 pps_t *ps_pps; 2019 2020 /** vps parameters activated by current slice */ 2021 vps_t *ps_vps; 2022 2023 /** vps parameters activated by current slice */ 2024 sei_params_t s_sei; 2025 2026 /* Flag to indicate if AUD NAL is present */ 2027 WORD8 i1_aud_present_flag; 2028 2029 /* Flag to indicate if EOS NAL is present */ 2030 WORD8 i1_eos_present_flag; 2031 2032 /** nal_type for the slice to be encoded */ 2033 WORD32 i4_slice_nal_type; 2034 2035 /** input time stamp in terms of ticks: lower 32 */ 2036 WORD32 i4_inp_timestamp_low; 2037 2038 /** input time stamp in terms of ticks: higher 32 */ 2039 WORD32 i4_inp_timestamp_high; 2040 2041 /** input frame ctxt of app to be retured in output buffer */ 2042 void *pv_app_frm_ctxt; 2043 2044 /** current frm valid flag : 2045 * will be 1 if valid input was processed by frame proc thrd 2046 */ 2047 WORD32 i4_frm_proc_valid_flag; 2048 2049 /** To support entropy sync the bitstream offset of each CTB row 2050 * is populated in this array any put in slice header in the end 2051 */ 2052 WORD32 ai4_entry_point_offset[MAX_NUM_CTB_ROWS_FRM]; 2053 2054 /** RDopt estimation of bytes generated based on which rc update happens 2055 * 2056 */ 2057 WORD32 i4_rdopt_bits_generated_estimate; 2058 2059 /* These params are passed from enc-threads to entropy thread for 2060 passing params needed for PSNR caclulation and encoding 2061 summary prints */ 2062 DOUBLE lf_luma_mse; 2063 DOUBLE lf_cb_mse; 2064 DOUBLE lf_cr_mse; 2065 2066 DOUBLE lf_luma_ssim; 2067 DOUBLE lf_cb_ssim; 2068 DOUBLE lf_cr_ssim; 2069 2070 WORD32 i4_qp; 2071 WORD32 i4_poc; 2072 WORD32 i4_display_num; 2073 WORD32 i4_pic_type; 2074 2075 /** I-only SCD */ 2076 WORD32 i4_is_I_scenecut; 2077 2078 WORD32 i4_is_non_I_scenecut; 2079 WORD32 i4_sub_pic_level_rc; 2080 2081 WORD32 ai4_frame_bits_estimated; 2082 s_pic_level_acc_info_t s_pic_level_info; 2083 2084 LWORD64 i8_buf_level_bitrate_change; 2085 2086 WORD32 i4_is_end_of_idr_gop; 2087 2088 sei_payload_t as_sei_payload[MAX_NUMBER_OF_SEI_PAYLOAD]; 2089 2090 UWORD32 u4_num_sei_payload; 2091 /* Flag used only in mres single output case to flush out one res and start with next */ 2092 WORD32 i4_out_flush_flag; 2093 2094 } frm_proc_ent_cod_ctxt_t; 2095 2096 /** 2097 ****************************************************************************** 2098 * @brief ME pass and Main enocde pass shared variables and buffers 2099 ****************************************************************************** 2100 */ 2101 typedef struct 2102 { 2103 /*BitRate ID*/ 2104 WORD32 i4_br_id; 2105 2106 /*Frame ID*/ 2107 WORD32 i4_frm_id; 2108 2109 /*Number of CTB, after ich data is populated*/ 2110 WORD32 i4_ctb_count_in_data; 2111 2112 /*Number of CTB, after ich scale is computed*/ 2113 WORD32 i4_ctb_count_out_scale; 2114 2115 /*Bits estimated for the frame */ 2116 /* For NON-I SCD max buf bits*/ 2117 LWORD64 i8_frame_bits_estimated; 2118 2119 /* Bits consumed till the nctb*/ 2120 LWORD64 i8_nctb_bits_consumed; 2121 2122 /* Bits consumed till the nctb*/ 2123 LWORD64 i8_acc_bits_consumed; 2124 2125 /*Frame level Best of Ipe and ME sad*/ 2126 LWORD64 i8_frame_l1_me_sad; 2127 2128 /*SAD accumalted till NCTB*/ 2129 LWORD64 i8_nctb_l1_me_sad; 2130 2131 /*Frame level IPE sad*/ 2132 LWORD64 i8_frame_l1_ipe_sad; 2133 2134 /*SAD accumalted till NCTB*/ 2135 LWORD64 i8_nctb_l1_ipe_sad; 2136 2137 /*Frame level L0 IPE satd*/ 2138 LWORD64 i8_frame_l0_ipe_satd; 2139 2140 /*L0 SATD accumalted till NCTB*/ 2141 LWORD64 i8_nctb_l0_ipe_satd; 2142 2143 /*Frame level Activity factor acc at 8x8 level */ 2144 LWORD64 i8_frame_l1_activity_fact; 2145 2146 /*NCTB Activity factor acc at 8x8 level */ 2147 LWORD64 i8_nctb_l1_activity_fact; 2148 2149 /*L0 MPM bits accumalted till NCTB*/ 2150 LWORD64 i8_nctb_l0_mpm_bits; 2151 2152 /*Encoder hdr accumalted till NCTB*/ 2153 LWORD64 i8_nctb_hdr_bits_consumed; 2154 2155 } ihevce_sub_pic_rc_ctxt_t; 2156 2157 /** 2158 ****************************************************************************** 2159 * @brief Memoery manager context (stores the memory tables allcoated) 2160 ****************************************************************************** 2161 */ 2162 typedef struct 2163 { 2164 /** 2165 * Total number of memtabs (Modules and system) 2166 * during create time 2167 */ 2168 WORD32 i4_num_create_memtabs; 2169 2170 /** 2171 * Pointer to the mem tabs 2172 * of crate time 2173 */ 2174 iv_mem_rec_t *ps_create_memtab; 2175 2176 /** 2177 * Total number of memtabs Data and control Ques 2178 * during Ques create time 2179 */ 2180 WORD32 i4_num_q_memtabs; 2181 2182 /** 2183 * Pointer to the mem tabs 2184 * of crate time 2185 */ 2186 iv_mem_rec_t *ps_q_memtab; 2187 2188 } enc_mem_mngr_ctxt; 2189 2190 /** 2191 ****************************************************************************** 2192 * @brief Encoder Interafce Queues Context 2193 ****************************************************************************** 2194 */ 2195 typedef struct 2196 { 2197 /** Number of Queues at interface context level */ 2198 WORD32 i4_num_queues; 2199 2200 /** Array of Queues handle */ 2201 void *apv_q_hdl[IHEVCE_MAX_NUM_QUEUES]; 2202 2203 /** Mutex for encuring thread safety of the access of the queues */ 2204 void *pv_q_mutex_hdl; 2205 2206 } enc_q_ctxt_t; 2207 2208 /** 2209 ****************************************************************************** 2210 * @brief Module context of different modules in encoder 2211 ****************************************************************************** 2212 */ 2213 2214 typedef struct 2215 { 2216 /** Motion estimation context pointer */ 2217 void *pv_me_ctxt; 2218 /** Coarse Motion estimation context pointer */ 2219 void *pv_coarse_me_ctxt; 2220 2221 /** Intra Prediction context pointer */ 2222 void *pv_ipe_ctxt; 2223 2224 /** Encode Loop context pointer */ 2225 void *pv_enc_loop_ctxt; 2226 2227 /** Entropy Coding context pointer */ 2228 void *apv_ent_cod_ctxt[IHEVCE_MAX_NUM_BITRATES]; 2229 2230 /** Look Ahead Processing context pointer */ 2231 void *pv_lap_ctxt; 2232 /** Rate control context pointer */ 2233 void *apv_rc_ctxt[IHEVCE_MAX_NUM_BITRATES]; 2234 /** Decomposition pre intra context pointer */ 2235 void *pv_decomp_pre_intra_ctxt; 2236 2237 } module_ctxt_t; 2238 2239 /** 2240 ****************************************************************************** 2241 * @brief Threads semaphore handles 2242 ****************************************************************************** 2243 */ 2244 typedef struct 2245 { 2246 /** LAP semaphore handle */ 2247 void *pv_lap_sem_handle; 2248 2249 /** Encode frame Process semaphore handle */ 2250 void *pv_enc_frm_proc_sem_handle; 2251 2252 /** Pre Encode frame Process semaphore handle */ 2253 void *pv_pre_enc_frm_proc_sem_handle; 2254 2255 /** Entropy coding semaphore handle 2256 One semaphore for each entropy thread, i.e. for each bit-rate instance*/ 2257 void *apv_ent_cod_sem_handle[IHEVCE_MAX_NUM_BITRATES]; 2258 2259 /** 2260 * Semaphore handle corresponding to get free inp frame buff 2261 * function call from app if called in blocking mode 2262 */ 2263 void *pv_inp_data_sem_handle; 2264 2265 /** 2266 * Semaphore handle corresponding to get free inp control command buff 2267 * function call from app if called in blocking mode 2268 */ 2269 void *pv_inp_ctrl_sem_handle; 2270 2271 /** 2272 * Semaphore handle corresponding to get filled out bitstream buff 2273 * function call from app if called in blocking mode 2274 */ 2275 void *apv_out_strm_sem_handle[IHEVCE_MAX_NUM_BITRATES]; 2276 2277 /** 2278 * Semaphore handle corresponding to get filled out recon buff 2279 * function call from app if called in blocking mode 2280 */ 2281 void *apv_out_recon_sem_handle[IHEVCE_MAX_NUM_BITRATES]; 2282 2283 /** 2284 * Semaphore handle corresponding to get filled out control status buff 2285 * function call from app if called in blocking mode 2286 */ 2287 void *pv_out_ctrl_sem_handle; 2288 2289 /** 2290 * Semaphore handle corresponding to get filled out control status buff 2291 * function call from app if called in blocking mode 2292 */ 2293 void *pv_lap_inp_data_sem_hdl; 2294 2295 /** 2296 * Semaphore handle corresponding to get filled out control status buff 2297 * function call from app if called in blocking mode 2298 */ 2299 void *pv_preenc_inp_data_sem_hdl; 2300 2301 /** 2302 * Semaphore handle corresponding to Multi Res Single output case 2303 */ 2304 void *pv_ent_common_mres_sem_hdl; 2305 void *pv_out_common_mres_sem_hdl; 2306 2307 } thrd_que_sem_hdl_t; 2308 2309 /** 2310 ****************************************************************************** 2311 * @brief Frame level structure which has parameters about CTBs 2312 ****************************************************************************** 2313 */ 2314 typedef struct 2315 { 2316 /** CTB size of all CTB in a frame in pixels 2317 * this will be create time value, 2318 * run time change in this value is not supported 2319 */ 2320 WORD32 i4_ctb_size; 2321 2322 /** Minimum CU size of CTB in a frame in pixels 2323 * this will be create time value, 2324 * run time change in this value is not supported 2325 */ 2326 WORD32 i4_min_cu_size; 2327 2328 /** Worst case num CUs in CTB based on i4_ctb_size */ 2329 WORD32 i4_num_cus_in_ctb; 2330 2331 /** Worst case num PUs in CTB based on i4_ctb_size */ 2332 WORD32 i4_num_pus_in_ctb; 2333 2334 /** Worst case num TUs in CTB based on i4_ctb_size */ 2335 WORD32 i4_num_tus_in_ctb; 2336 2337 /** Number of CTBs in horizontal direction 2338 * this is based on run time source width and i4_ctb_size 2339 */ 2340 WORD32 i4_num_ctbs_horz; 2341 2342 /** Number of CTBs in vertical direction 2343 * this is based on run time source height and i4_ctb_size 2344 */ 2345 WORD32 i4_num_ctbs_vert; 2346 2347 /** MAX CUs in horizontal direction 2348 * this is based on run time source width, i4_ctb_size and i4_num_cus_in_ctb 2349 */ 2350 WORD32 i4_max_cus_in_row; 2351 2352 /** MAX PUs in horizontal direction 2353 * this is based on run time source width, i4_ctb_size and i4_num_pus_in_ctb 2354 */ 2355 WORD32 i4_max_pus_in_row; 2356 2357 /** MAX TUs in horizontal direction 2358 * this is based on run time source width, i4_ctb_size and i4_num_tus_in_ctb 2359 */ 2360 WORD32 i4_max_tus_in_row; 2361 2362 /** 2363 * CU aligned picture width (currently aligned to MAX CU size) 2364 * should be modified to be aligned to MIN CU size 2365 */ 2366 2367 WORD32 i4_cu_aligned_pic_wd; 2368 2369 /** 2370 * CU aligned picture height (currently aligned to MAX CU size) 2371 * should be modified to be aligned to MIN CU size 2372 */ 2373 2374 WORD32 i4_cu_aligned_pic_ht; 2375 2376 /* Pointer to a frame level memory, 2377 Stride is = 1 + (num ctbs in a ctb-row) + 1 2378 Hieght is = 1 + (num ctbs in a ctb-col) 2379 Contains tile-id of each ctb */ 2380 WORD32 *pi4_tile_id_map; 2381 2382 /* stride in units of ctb */ 2383 WORD32 i4_tile_id_ctb_map_stride; 2384 2385 } frm_ctb_ctxt_t; 2386 2387 /** 2388 ****************************************************************************** 2389 * @brief ME Job Queue desc 2390 ****************************************************************************** 2391 */ 2392 typedef struct 2393 { 2394 /** Number of output dependencies which need to be set after 2395 * current job is complete, 2396 * should be less than or equal to MAX_OUT_DEP defined in 2397 * ihevce_multi_thrd_structs.h 2398 */ 2399 WORD32 i4_num_output_dep; 2400 2401 /** Array of offsets from the start of output dependent layer's Job Ques 2402 * which are dependent on current Job to be complete 2403 */ 2404 WORD32 ai4_out_dep_unit_off[MAX_OUT_DEP]; 2405 2406 /** Number of input dependencies to be resolved for current job to start 2407 * these many jobs in lower layer should be complete to 2408 * start the current JOB 2409 */ 2410 WORD32 i4_num_inp_dep; 2411 2412 } multi_thrd_me_job_q_prms_t; 2413 2414 /** 2415 * @brief structure in which recon data 2416 * and related parameters are sent from Encoder 2417 */ 2418 typedef struct 2419 { 2420 /** Kept for maintaining backwards compatibility in future */ 2421 WORD32 i4_size; 2422 2423 /** Buffer id for the current buffer */ 2424 WORD32 i4_buf_id; 2425 2426 /** POC of the current buffer */ 2427 WORD32 i4_poc; 2428 2429 /** End flag to communicate this is last frame output from encoder */ 2430 WORD32 i4_end_flag; 2431 2432 /** End flag to communicate encoder that this is the last buffer from application 2433 1 - Last buf, 0 - Not last buffer. No other values are supported. 2434 Application has to set the appropriate value before queing in encoder queue */ 2435 2436 WORD32 i4_is_last_buf; 2437 2438 /** Recon luma buffer pointer */ 2439 void *pv_y_buf; 2440 2441 /** Recon cb buffer pointer */ 2442 void *pv_cb_buf; 2443 2444 /** Recon cr buffer pointer */ 2445 void *pv_cr_buf; 2446 2447 /** Luma size **/ 2448 WORD32 i4_y_pixels; 2449 2450 /** Chroma size **/ 2451 WORD32 i4_uv_pixels; 2452 2453 } iv_enc_recon_data_buffs_t; 2454 2455 /** 2456 ****************************************************************************** 2457 * @brief Multi Thread context structure 2458 ****************************************************************************** 2459 */ 2460 typedef struct 2461 { 2462 /* Flag to indicate to enc and pre-enc thrds that app has sent force end cmd*/ 2463 WORD32 i4_force_end_flag; 2464 2465 /** Force all active threads flag 2466 * This flag will be set to 1 if all Number of cores givento the encoder 2467 * is less than or Equal to MAX_NUM_CORES_SEQ_EXEC. In this mode 2468 * All pre enc threads and enc threads will run of the same cores with 2469 * time sharing ar frame level 2470 */ 2471 WORD32 i4_all_thrds_active_flag; 2472 2473 /** Flag to indicate that core manager has been configured to enable 2474 * sequential execution 2475 */ 2476 WORD32 i4_seq_mode_enabled_flag; 2477 /*-----------------------------------------------------------------------*/ 2478 /*--------- Params related to encode group -----------------------------*/ 2479 /*-----------------------------------------------------------------------*/ 2480 2481 /** Number of processing threads created runtime in encode group */ 2482 WORD32 i4_num_enc_proc_thrds; 2483 2484 /** Number of processing threads active for a given frame 2485 * This value will be monitored at frame level, so as to 2486 * have provsion for increasing / decreasing threads 2487 * based on Load balance b/w stage in encoder 2488 */ 2489 WORD32 i4_num_active_enc_thrds; 2490 /** Job Queue Memory encode */ 2491 job_queue_t *ps_job_q_enc[PING_PONG_BUF]; 2492 2493 /** Array of Job Queue handles of enc group for ping and pong instance*/ 2494 job_queue_handle_t as_job_que_enc_hdls[NUM_ENC_JOBS_QUES][PING_PONG_BUF]; 2495 2496 /** Mutex for ensuring thread safety of the access of Job queues in encode group */ 2497 void *pv_job_q_mutex_hdl_enc_grp_me; 2498 2499 /** Mutex for ensuring thread safety of the access of Job queues in encode group */ 2500 void *pv_job_q_mutex_hdl_enc_grp_enc_loop; 2501 2502 /** Array of Semaphore handles (for each frame processing threads ) */ 2503 void *apv_enc_thrd_sem_handle[MAX_NUM_FRM_PROC_THRDS_ENC]; 2504 2505 /** Array for communcating start processing from master thread to indivisual 2506 * threads in Enocde group of threads 2507 * till 0 : wait 2508 * 1 : start 2509 * After reading the start signal, corresponding thread hould reset it to 0 2510 */ 2511 WORD32 ai4_enc_frm_proc_start[MAX_NUM_FRM_PROC_THRDS_ENC]; 2512 2513 /** Note: For Enc loop pass similar memory is used whihc is part of frm_proc_ent_cod_ctxt_t 2514 * for Row level Sync hence not explicitly declared here 2515 */ 2516 2517 /** Array for ME to export the Job que dependency for all layers */ 2518 multi_thrd_me_job_q_prms_t as_me_job_q_prms[MAX_NUM_HME_LAYERS][MAX_NUM_VERT_UNITS_FRM]; 2519 2520 /* pointer to the mutex handle*/ 2521 void *apv_mutex_handle[MAX_NUM_ME_PARALLEL]; 2522 2523 /* pointer to the mutex handle for frame init*/ 2524 void *apv_mutex_handle_me_end[MAX_NUM_ME_PARALLEL]; 2525 2526 /* pointer to the mutex handle for frame init*/ 2527 void *apv_mutex_handle_frame_init[MAX_NUM_ENC_LOOP_PARALLEL]; 2528 2529 /*pointer to the mutex handle*/ 2530 void *apv_post_enc_mutex_handle[MAX_NUM_ENC_LOOP_PARALLEL]; 2531 2532 /* Flag to indicate that master has done ME init*/ 2533 WORD32 ai4_me_master_done_flag[MAX_NUM_ME_PARALLEL]; 2534 2535 /* Counter to keep track of me num of thrds exiting critical section*/ 2536 WORD32 me_num_thrds_exited[MAX_NUM_ME_PARALLEL]; 2537 2538 /* Flag to indicate that master has done the frame init*/ 2539 WORD32 enc_master_done_frame_init[MAX_NUM_ENC_LOOP_PARALLEL]; 2540 2541 /* Counter to keep track of num of thrds exiting critical section*/ 2542 WORD32 num_thrds_exited[MAX_NUM_ENC_LOOP_PARALLEL]; 2543 2544 /* Counter to keep track of num of thrds exiting critical section for re-encode*/ 2545 WORD32 num_thrds_exited_for_reenc; 2546 2547 /* Array to store the curr qp for ping and pong instance*/ 2548 WORD32 cur_qp[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; 2549 2550 /* Pointers to store output buffers for ping and pong instance*/ 2551 frm_proc_ent_cod_ctxt_t *ps_curr_out_enc_grp[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; 2552 2553 /* Pointer to store input buffers for me*/ 2554 pre_enc_me_ctxt_t *aps_cur_inp_me_prms[MAX_NUM_ME_PARALLEL]; 2555 2556 /*pointers to store output buffers from me */ 2557 me_enc_rdopt_ctxt_t *aps_cur_out_me_prms[NUM_ME_ENC_BUFS]; 2558 2559 /*pointers to store input buffers to enc-rdopt */ 2560 me_enc_rdopt_ctxt_t *aps_cur_inp_enc_prms[NUM_ME_ENC_BUFS]; 2561 2562 /*Shared memory for Sub Pic rc */ 2563 /*Qscale calulated by sub pic rc bit control for Intra Pic*/ 2564 WORD32 ai4_curr_qp_estimated[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; 2565 2566 /*Header bits error by sub pic rc bit control*/ 2567 float af_acc_hdr_bits_scale_err[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; 2568 2569 /*Accumalated ME SAD for NCTB*/ 2570 LWORD64 ai8_nctb_me_sad[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES] 2571 [MAX_NUM_FRM_PROC_THRDS_ENC]; 2572 2573 /*Accumalated IPE SAD for NCTB*/ 2574 LWORD64 ai8_nctb_ipe_sad[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES] 2575 [MAX_NUM_FRM_PROC_THRDS_ENC]; 2576 2577 /*Accumalated L0 IPE SAD for NCTB*/ 2578 LWORD64 ai8_nctb_l0_ipe_sad[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES] 2579 [MAX_NUM_FRM_PROC_THRDS_ENC]; 2580 2581 /*Accumalated Activity Factor for NCTB*/ 2582 LWORD64 ai8_nctb_act_factor[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES] 2583 [MAX_NUM_FRM_PROC_THRDS_ENC]; 2584 2585 /*Accumalated Ctb counter across all threads*/ 2586 WORD32 ai4_ctb_ctr[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; 2587 2588 /*Bits threshold reached for across all threads*/ 2589 WORD32 ai4_threshold_reached[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; 2590 2591 /*To hold the Previous In-frame RC chunk QP*/ 2592 WORD32 ai4_prev_chunk_qp[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; 2593 2594 /*Accumalated Ctb counter across all threads*/ 2595 WORD32 ai4_acc_ctb_ctr[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; 2596 2597 /*Flag to check if thread is initialized */ 2598 WORD32 ai4_thrd_id_valid_flag[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES] 2599 [MAX_NUM_FRM_PROC_THRDS_ENC]; 2600 2601 /*Accumalated Ctb counter across all threads*/ 2602 //WORD32 ai4_acc_qp[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES][MAX_NUM_FRM_PROC_THRDS_ENC]; 2603 2604 /*Accumalated bits consumed for nctbs across all threads*/ 2605 LWORD64 ai8_nctb_bits_consumed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES] 2606 [MAX_NUM_FRM_PROC_THRDS_ENC]; 2607 2608 /*Accumalated hdr bits consumed for nctbs across all threads*/ 2609 LWORD64 ai8_nctb_hdr_bits_consumed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES] 2610 [MAX_NUM_FRM_PROC_THRDS_ENC]; 2611 2612 /*Accumalated l0 mpm bits consumed for nctbs across all threads*/ 2613 LWORD64 ai8_nctb_mpm_bits_consumed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES] 2614 [MAX_NUM_FRM_PROC_THRDS_ENC]; 2615 2616 /*Accumalated bits consumed for total ctbs across all threads*/ 2617 LWORD64 ai8_acc_bits_consumed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES] 2618 [MAX_NUM_FRM_PROC_THRDS_ENC]; 2619 2620 /*Accumalated bits consumed for total ctbs across all threads*/ 2621 LWORD64 ai8_acc_bits_mul_qs_consumed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES] 2622 [MAX_NUM_FRM_PROC_THRDS_ENC]; 2623 2624 /*Qscale calulated by sub pic rc bit control */ 2625 WORD32 ai4_curr_qp_acc[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; 2626 /* End of Sub pic rc variables */ 2627 2628 /* Pointers to store input (only L0 IPE)*/ 2629 pre_enc_L0_ipe_encloop_ctxt_t *aps_cur_L0_ipe_inp_prms[MAX_NUM_ME_PARALLEL]; 2630 2631 /** Slice header parameters */ 2632 /** temporarily store the slice header parameters in enc-loop thread 2633 which will be copied to curr_out when buffer is aquired */ 2634 //slice_header_t as_slice_hdr[PING_PONG_BUF]; 2635 2636 /* Array to store input buffer ids for ping and pong instances*/ 2637 //WORD32 in_buf_id[PING_PONG_BUF]; 2638 2639 /* Array tp store L0 IPE input buf ids*/ 2640 WORD32 ai4_in_frm_l0_ipe_id[MAX_NUM_ME_PARALLEL]; 2641 2642 /* Array to store output buffer ids for ping and pong instances*/ 2643 WORD32 out_buf_id[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; //[PING_PONG_BUF]; 2644 2645 /* Variable to indicate ping and pong instance for each thread*/ 2646 WORD32 ping_pong[MAX_NUM_FRM_PROC_THRDS_ENC]; 2647 2648 /* Array of pointers to store the recon buf pointers*/ 2649 iv_enc_recon_data_buffs_t 2650 *ps_recon_out[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; //[PING_PONG_BUF]; 2651 2652 /* Array of pointers to frame recon for ping and pong instances*/ 2653 recon_pic_buf_t *ps_frm_recon[NUM_ME_ENC_BUFS][IHEVCE_MAX_NUM_BITRATES]; 2654 2655 /* Array of recon buffer ids for ping and pong instance*/ 2656 WORD32 recon_buf_id[NUM_ME_ENC_BUFS][IHEVCE_MAX_NUM_BITRATES]; //[PING_PONG_BUF]; 2657 2658 /* End flag to signal end of all the frames in me*/ 2659 WORD32 me_end_flag; 2660 2661 /* End flag to signal end of all the frames in enc*/ 2662 WORD32 enc_end_flag; 2663 2664 /* Counter to keep track of num thrds done*/ 2665 WORD32 num_thrds_done; 2666 2667 /* Flags to keep track of dumped ping pong recon buffer*/ 2668 WORD32 is_recon_dumped[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; //[PING_PONG_BUF]; 2669 2670 /* Flags to keep track of dumped ping pong output buffer*/ 2671 WORD32 is_out_buf_freed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; //[PING_PONG_BUF]; 2672 2673 /* flag to produce output buffer by the thread who ever is finishing 2674 enc-loop processing first, so that the entropy thread can start processing */ 2675 WORD32 ai4_produce_outbuf[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; 2676 2677 /* Flags to keep track of dumped ping pong input buffer*/ 2678 WORD32 is_in_buf_freed[MAX_NUM_ENC_LOOP_PARALLEL]; 2679 2680 /* Flags to keep track of dumped ping pong L0 IPE to enc buffer*/ 2681 WORD32 is_L0_ipe_in_buf_freed[MAX_NUM_ENC_LOOP_PARALLEL]; 2682 2683 /** Dependency manager for checking whether prev. EncLoop done before 2684 current frame EncLoop starts */ 2685 void *apv_dep_mngr_prev_frame_done[MAX_NUM_ENC_LOOP_PARALLEL]; 2686 2687 /** Dependency manager for checking whether prev. EncLoop done before 2688 re-encode of the current frame */ 2689 void *pv_dep_mngr_prev_frame_enc_done_for_reenc; 2690 2691 /** Dependency manager for checking whether prev. me done before 2692 current frame me starts */ 2693 void *apv_dep_mngr_prev_frame_me_done[MAX_NUM_ME_PARALLEL]; 2694 2695 /** ME coarsest layer JOB queue type */ 2696 WORD32 i4_me_coarsest_lyr_type; 2697 2698 /** number of encloop frames running in parallel */ 2699 WORD32 i4_num_enc_loop_frm_pllel; 2700 2701 /** number of me frames running in parallel */ 2702 WORD32 i4_num_me_frm_pllel; 2703 2704 /*-----------------------------------------------------------------------*/ 2705 /*--------- Params related to pre-enc stage -----------------------------*/ 2706 /*-----------------------------------------------------------------------*/ 2707 2708 /** Number of processing threads created runtime in pre encode group */ 2709 WORD32 i4_num_pre_enc_proc_thrds; 2710 2711 /** Number of processing threads active for a given frame 2712 * This value will be monitored at frame level, so as to 2713 * have provsion for increasing / decreasing threads 2714 * based on Load balance b/w stage in encoder 2715 */ 2716 WORD32 i4_num_active_pre_enc_thrds; 2717 /** number of threads that have done processing the current frame 2718 Use to find out the last thread that is coming out of pre-enc processing 2719 so that the last thread can do de-init of pre-enc stage */ 2720 WORD32 ai4_num_thrds_processed_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2721 2722 /** number of threads that have done processing the current frame 2723 Use to find out the first thread and last inoder to get qp query. As the query 2724 is not read only , the quer should be done only once by thread that comes first 2725 and other threads should get same value*/ 2726 WORD32 ai4_num_thrds_processed_L0_ipe_qp_init[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2727 2728 /** number of threads that have done proessing decomp_intra 2729 Used to find out the last thread that is coming out so that 2730 the last thread can set flag for decomp_pre_intra_finish */ 2731 WORD32 ai4_num_thrds_processed_decomp[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2732 2733 /** number of threads that have done proessing coarse_me 2734 Used to find out the last thread that is coming out so that 2735 the last thread can set flag for coarse_me_finish */ 2736 WORD32 ai4_num_thrds_processed_coarse_me[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2737 2738 /*Flag to indicate if current instance (frame)'s Decomp_pre_intra and Coarse_ME is done. 2739 Used to check if previous frame is done proecessing decom_pre_intra and coarse_me */ 2740 WORD32 ai4_decomp_coarse_me_complete_flag[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2741 2742 /** Dependency manager for checking whether prev. frame decomp_intra 2743 done before current frame decomp_intra starts */ 2744 void *pv_dep_mngr_prev_frame_pre_enc_l1; 2745 2746 /** Dependency manager for checking whether prev. frame L0 IPE done before 2747 current frame L0 IPE starts */ 2748 void *pv_dep_mngr_prev_frame_pre_enc_l0; 2749 2750 /** Dependency manager for checking whether prev. frame coarse_me done before 2751 current frame coarse_me starts */ 2752 void *pv_dep_mngr_prev_frame_pre_enc_coarse_me; 2753 2754 /** flag to indicate if pre_enc_init is done for current frame */ 2755 WORD32 ai4_pre_enc_init_done[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2756 2757 /** flag to indicate if pre_enc_hme_init is done for current frame */ 2758 WORD32 ai4_pre_enc_hme_init_done[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2759 2760 /** flag to indicate if pre_enc_deinit is done for current frame */ 2761 WORD32 ai4_pre_enc_deinit_done[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2762 2763 /** Flag to indicate the end of processing when all the frames are done processing */ 2764 WORD32 ai4_end_flag_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2765 2766 /** Flag to indicate the control blocking mode indicating input command to pre-enc 2767 group should be blocking or unblocking */ 2768 WORD32 i4_ctrl_blocking_mode; 2769 2770 /** Current input pointer */ 2771 ihevce_lap_enc_buf_t *aps_curr_inp_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2772 2773 WORD32 i4_last_inp_buf; 2774 2775 /* buffer id for input buffer */ 2776 WORD32 ai4_in_buf_id_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2777 2778 /** Current output pointer */ 2779 pre_enc_me_ctxt_t *aps_curr_out_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2780 2781 /*Current L0 IPE to enc output pointer */ 2782 pre_enc_L0_ipe_encloop_ctxt_t *ps_L0_IPE_curr_out_pre_enc; 2783 2784 /** buffer id for output buffer */ 2785 WORD32 ai4_out_buf_id_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2786 2787 /** buffer id for L0 IPE enc buffer*/ 2788 WORD32 i4_L0_IPE_out_buf_id; 2789 2790 /** current frame recon pointer */ 2791 recon_pic_buf_t *aps_frm_recon_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2792 2793 /** Current picture Qp */ 2794 WORD32 ai4_cur_frame_qp_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2795 2796 /** Decomp layer buffers indicies */ 2797 WORD32 ai4_decomp_lyr_buf_idx[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2798 2799 /*since it is guranteed that cur frame ipe will not start unless prev frame ipe is completly done, 2800 an array of MAX_PRE_ENC_STAGGER might not be required*/ 2801 WORD32 i4_qp_update_l0_ipe; 2802 2803 /** Current picture encoded is the last picture to be encoded flag */ 2804 WORD32 i4_last_pic_flag; 2805 2806 /** Mutex for ensuring thread safety of the access of Job queues in decomp stage */ 2807 void *pv_job_q_mutex_hdl_pre_enc_decomp; 2808 2809 /** Mutex for ensuring thread safety of the access of Job queues in HME group */ 2810 void *pv_job_q_mutex_hdl_pre_enc_hme; 2811 2812 /** Mutex for ensuring thread safety of the access of Job queues in l0 ipe stage */ 2813 void *pv_job_q_mutex_hdl_pre_enc_l0ipe; 2814 2815 /** mutex handle for pre-enc init */ 2816 void *pv_mutex_hdl_pre_enc_init; 2817 2818 /** mutex handle for pre-enc decomp deinit */ 2819 void *pv_mutex_hdl_pre_enc_decomp_deinit; 2820 2821 /** mutex handle for pre enc hme init */ 2822 void *pv_mutex_hdl_pre_enc_hme_init; 2823 2824 /** mutex handle for pre-enc hme deinit */ 2825 void *pv_mutex_hdl_pre_enc_hme_deinit; 2826 2827 /*qp qurey before l0 ipe is done by multiple frame*/ 2828 /** mutex handle for L0 ipe(pre-enc init)*/ 2829 void *pv_mutex_hdl_l0_ipe_init; 2830 2831 /** mutex handle for pre-enc deinit */ 2832 void *pv_mutex_hdl_pre_enc_deinit; 2833 2834 /** Array of Semaphore handles (for each frame processing threads ) */ 2835 void *apv_pre_enc_thrd_sem_handle[MAX_NUM_FRM_PROC_THRDS_ENC]; 2836 /** array which will tell the number of CTB processed in each row, 2837 * used for Row level sync in IPE pass 2838 */ 2839 WORD32 ai4_ctbs_in_row_proc_ipe_pass[MAX_NUM_CTB_ROWS_FRM]; 2840 2841 /** Job Queue Memory pre encode */ 2842 job_queue_t *aps_job_q_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2843 2844 /** Array of Job Queue handles enc group */ 2845 job_queue_handle_t as_job_que_preenc_hdls[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME] 2846 [NUM_PRE_ENC_JOBS_QUES]; 2847 2848 /* accumulate intra sad across all thread to get qp before L0 IPE*/ 2849 WORD32 ai4_intra_satd_acc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME] 2850 [MAX_NUM_FRM_PROC_THRDS_PRE_ENC]; 2851 2852 WORD32 i4_delay_pre_me_btw_l0_ipe; 2853 2854 /*** This variable has the maximum delay between hme and l0ipe ***/ 2855 /*** This is used for wrapping around L0IPE index ***/ 2856 WORD32 i4_max_delay_pre_me_btw_l0_ipe; 2857 2858 /* This is to register the handles of Dep Mngr b/w EncLoop and ME */ 2859 /* This is used to delete the Mngr at the end */ 2860 void *apv_dep_mngr_encloop_dep_me[NUM_ME_ENC_BUFS]; 2861 /*flag to track buffer in me/enc que is produced or not*/ 2862 WORD32 ai4_me_enc_buff_prod_flag[NUM_ME_ENC_BUFS]; 2863 2864 /*out buf que id for me */ 2865 WORD32 ai4_me_out_buf_id[NUM_ME_ENC_BUFS]; 2866 2867 /*in buf que id for enc from me*/ 2868 WORD32 i4_enc_in_buf_id[NUM_ME_ENC_BUFS]; 2869 2870 /* This is used to tell whether the free of recon buffers are done or not */ 2871 WORD32 i4_is_recon_free_done; 2872 2873 /* index for DVSR population */ 2874 WORD32 i4_idx_dvsr_p; 2875 WORD32 aai4_l1_pre_intra_done[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME] 2876 [(HEVCE_MAX_HEIGHT >> 1) / 8]; 2877 2878 WORD32 i4_rc_l0_qp; 2879 2880 /* Used for mres single out cases. Checks whether a particular resolution is active or passive */ 2881 /* Only one resolution should be active for mres_single_out case */ 2882 WORD32 *pi4_active_res_id; 2883 2884 /** 2885 * Sub Pic bit control mutex lock handle 2886 */ 2887 void *pv_sub_pic_rc_mutex_lock_hdl; 2888 2889 void *pv_sub_pic_rc_for_qp_update_mutex_lock_hdl; 2890 2891 WORD32 i4_encode; 2892 WORD32 i4_in_frame_rc_enabled; 2893 WORD32 i4_num_re_enc; 2894 2895 } multi_thrd_ctxt_t; 2896 2897 /** 2898 * @brief Structure to describe tile params 2899 */ 2900 typedef struct 2901 { 2902 /* flag to indicate tile encoding enabled/disabled */ 2903 WORD32 i4_tiles_enabled_flag; 2904 2905 /* flag to indicate unifrom spacing of tiles */ 2906 WORD32 i4_uniform_spacing_flag; 2907 2908 /* num tiles in a tile-row. num tiles in tile-col */ 2909 WORD32 i4_num_tile_cols; 2910 WORD32 i4_num_tile_rows; 2911 2912 /* Curr tile width and height*/ 2913 WORD32 i4_curr_tile_width; 2914 WORD32 i4_curr_tile_height; 2915 2916 /* Curr tile width and heignt in CTB units*/ 2917 WORD32 i4_curr_tile_wd_in_ctb_unit; 2918 WORD32 i4_curr_tile_ht_in_ctb_unit; 2919 2920 /* frame resolution */ 2921 //WORD32 i4_frame_width; /* encode-width */ 2922 //WORD32 i4_frame_height; /* encode-height */ 2923 2924 /* total num of tiles "in frame" */ 2925 WORD32 i4_num_tiles; 2926 2927 /* Curr tile id. Assigned by raster scan order in a frame */ 2928 WORD32 i4_curr_tile_id; 2929 2930 /* x-pos of first ctb of the slice in ctb */ 2931 /* y-pos of first ctb of the slice in ctb */ 2932 WORD32 i4_first_ctb_x; 2933 WORD32 i4_first_ctb_y; 2934 2935 /* x-pos of first ctb of the slice in samples */ 2936 /* y-pos of first ctb of the slice in samples */ 2937 WORD32 i4_first_sample_x; 2938 WORD32 i4_first_sample_y; 2939 2940 } ihevce_tile_params_t; 2941 2942 /** 2943 ****************************************************************************** 2944 * @brief Encoder context structure 2945 ****************************************************************************** 2946 */ 2947 2948 typedef struct 2949 { 2950 /** 2951 * vps parameters 2952 */ 2953 vps_t as_vps[IHEVCE_MAX_NUM_BITRATES]; 2954 2955 /** 2956 * sps parameters 2957 */ 2958 sps_t as_sps[IHEVCE_MAX_NUM_BITRATES]; 2959 2960 /** 2961 * pps parameters 2962 * Required for each bitrate separately, mainly because 2963 * init qp etc parameters needs to be different for each instance 2964 */ 2965 pps_t as_pps[IHEVCE_MAX_NUM_BITRATES]; 2966 2967 /** 2968 * Rate control mutex lock handle 2969 */ 2970 void *pv_rc_mutex_lock_hdl; 2971 2972 /** frame level cu analyse buffer pointer for ME 2973 * ME will get ps_ctb_analyse structure populated with ps_cu pointers 2974 * pointing to ps_cu_analyse buffer from IPE. 2975 */ 2976 //cu_analyse_t *ps_cu_analyse_inter[PING_PONG_BUF]; 2977 2978 /** 2979 * CTB frame context between encoder (producer) and entropy (consumer) 2980 */ 2981 enc_q_ctxt_t s_enc_ques; 2982 2983 /** 2984 * Encoder memory manager ctxt 2985 */ 2986 enc_mem_mngr_ctxt s_mem_mngr; 2987 2988 /** 2989 * Semaphores of all the threads created in HLE 2990 * and Que handle for buffers b/w frame process and entropy 2991 */ 2992 thrd_que_sem_hdl_t s_thrd_sem_ctxt; 2993 2994 /** 2995 * Reference /recon buffer Que pointer 2996 */ 2997 recon_pic_buf_t **pps_recon_buf_q[IHEVCE_MAX_NUM_BITRATES]; 2998 2999 /** 3000 * Number of buffers in Recon buffer queue 3001 */ 3002 WORD32 ai4_num_buf_recon_q[IHEVCE_MAX_NUM_BITRATES]; 3003 3004 /** 3005 * Reference / recon buffer Que pointer for Pre Encode group 3006 * this will be just a container and no buffers will be allcoated 3007 */ 3008 recon_pic_buf_t **pps_pre_enc_recon_buf_q; 3009 3010 /** 3011 * Number of buffers in Recon buffer queue 3012 */ 3013 WORD32 i4_pre_enc_num_buf_recon_q; 3014 3015 /** 3016 * frame level CTB parameters and worst PU CU and TU in a CTB row 3017 */ 3018 frm_ctb_ctxt_t s_frm_ctb_prms; 3019 3020 /* 3021 * Moudle ctxt pointers of all modules 3022 */ 3023 module_ctxt_t s_module_ctxt; 3024 3025 /* 3026 * LAP static parameters 3027 */ 3028 ihevce_lap_static_params_t s_lap_stat_prms; 3029 3030 /* 3031 * Run time dynamic source params 3032 */ 3033 3034 ihevce_src_params_t s_runtime_src_prms; 3035 3036 /* 3037 *Target params 3038 */ 3039 ihevce_tgt_params_t s_runtime_tgt_params; 3040 3041 /* 3042 * Run time dynamic coding params 3043 */ 3044 ihevce_coding_params_t s_runtime_coding_prms; 3045 3046 /** 3047 * Pointer to static config params 3048 */ 3049 ihevce_static_cfg_params_t *ps_stat_prms; 3050 3051 /** 3052 * the following structure members used for copying recon buf info 3053 * in case of duplicate pics 3054 */ 3055 3056 /** 3057 * Array of reference picture list for pre enc group 3058 * Separate list for ping_pong instnaces 3059 * 2=> ref_pic_list0 and ref_pic_list1 3060 */ 3061 recon_pic_buf_t as_pre_enc_ref_lists[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME][2] 3062 [HEVCE_MAX_REF_PICS * 2]; 3063 3064 /** 3065 * Array of reference picture list for pre enc group 3066 * Separate list for ping_pong instnaces 3067 * 2=> ref_pic_list0 and ref_pic_list1 3068 */ 3069 recon_pic_buf_t *aps_pre_enc_ref_lists[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME][2] 3070 [HEVCE_MAX_REF_PICS * 2]; 3071 3072 /** 3073 * Number of input frames per input queue 3074 */ 3075 WORD32 i4_num_input_buf_per_queue; 3076 3077 /** 3078 * poc of the Clean Random Access(CRA)Ipic 3079 */ 3080 WORD32 i4_cra_poc; 3081 3082 /** Number of ref pics in list 0 for any given frame */ 3083 WORD32 i4_num_ref_l0; 3084 3085 /** Number of ref pics in list 1 for any given frame */ 3086 WORD32 i4_num_ref_l1; 3087 3088 /** Number of active ref pics in list 0 for cur frame */ 3089 WORD32 i4_num_ref_l0_active; 3090 3091 /** Number of active ref pics in list 1 for cur frame */ 3092 WORD32 i4_num_ref_l1_active; 3093 3094 /** Number of ref pics in list 0 for any given frame pre encode stage */ 3095 WORD32 i4_pre_enc_num_ref_l0; 3096 3097 /** Number of ref pics in list 1 for any given frame pre encode stage */ 3098 WORD32 i4_pre_enc_num_ref_l1; 3099 3100 /** Number of active ref pics in list 0 for cur frame pre encode stage */ 3101 WORD32 i4_pre_enc_num_ref_l0_active; 3102 3103 /** Number of active ref pics in list 1 for cur frame pre encode stage */ 3104 WORD32 i4_pre_enc_num_ref_l1_active; 3105 3106 /** 3107 * working mem to be used for frm level activities 3108 * One example is interplation at frame level. This requires memory 3109 * of (max width + 16) * (max_height + 7 + 16 ) * 2 bytes. 3110 * This is so since we generate interp output for max_width + 16 x 3111 * max_height + 16, and then the intermediate output is 16 bit and 3112 * is max_height + 16 + 7 rows 3113 */ 3114 UWORD8 *pu1_frm_lvl_wkg_mem; 3115 3116 /** 3117 * Multi thread processing context 3118 * This memory contains the variables and pointers shared across threads 3119 * in enc-group and pre-enc-group 3120 */ 3121 multi_thrd_ctxt_t s_multi_thrd; 3122 3123 /** I/O Queues created status */ 3124 WORD32 i4_io_queues_created; 3125 3126 WORD32 i4_end_flag; 3127 3128 /** number of bit-rate instances running */ 3129 WORD32 i4_num_bitrates; 3130 3131 /** number of enc frames running in parallel */ 3132 WORD32 i4_num_enc_loop_frm_pllel; 3133 3134 /*ref bitrate id*/ 3135 WORD32 i4_ref_mbr_id; 3136 3137 /* Flag to indicate app, that end of processing has reached */ 3138 WORD32 i4_frame_limit_reached; 3139 3140 /*Structure to store the function selector 3141 * pointers for common and encoder */ 3142 func_selector_t s_func_selector; 3143 3144 /*ref resolution id*/ 3145 WORD32 i4_resolution_id; 3146 3147 /*hle context*/ 3148 void *pv_hle_ctxt; 3149 3150 rc_quant_t s_rc_quant; 3151 /*ME cost of P pic stored for the next ref B pic*/ 3152 //LWORD64 i8_acc_me_cost_of_p_pic_for_b_pic[2]; 3153 3154 UWORD32 u4_cur_pic_encode_cnt; 3155 UWORD32 u4_cur_pic_encode_cnt_dbp; 3156 /*past 2 p pics high complexity status*/ 3157 WORD32 ai4_is_past_pic_complex[2]; 3158 3159 WORD32 i4_is_I_reset_done; 3160 WORD32 i4_past_RC_reset_count; 3161 3162 WORD32 i4_future_RC_reset; 3163 3164 WORD32 i4_past_RC_scd_reset_count; 3165 3166 WORD32 i4_future_RC_scd_reset; 3167 WORD32 i4_poc_reset_values; 3168 3169 /*Place holder to store the length of LAP in first pass*/ 3170 /** Number of frames to look-ahead for RC by - 3171 * counts 2 fields as one frame for interlaced 3172 */ 3173 WORD32 i4_look_ahead_frames_in_first_pass; 3174 3175 WORD32 ai4_mod_factor_derived_by_variance[2]; 3176 float f_strength; 3177 3178 /*for B frames use the avg activity 3179 from the layer 0 (I or P) which is the average over 3180 Lap2 window*/ 3181 LWORD64 ai8_lap2_8x8_avg_act_from_T0[2]; 3182 3183 LWORD64 ai8_lap2_16x16_avg_act_from_T0[3]; 3184 3185 LWORD64 ai8_lap2_32x32_avg_act_from_T0[3]; 3186 3187 /*for B frames use the log of avg activity 3188 from the layer 0 (I or P) which is the average over 3189 Lap2 window*/ 3190 long double ald_lap2_8x8_log_avg_act_from_T0[2]; 3191 3192 long double ald_lap2_16x16_log_avg_act_from_T0[3]; 3193 3194 long double ald_lap2_32x32_log_avg_act_from_T0[3]; 3195 3196 ihevce_tile_params_t *ps_tile_params_base; 3197 3198 WORD32 ai4_column_width_array[MAX_TILE_COLUMNS]; 3199 3200 WORD32 ai4_row_height_array[MAX_TILE_ROWS]; 3201 3202 /* Architecture */ 3203 IV_ARCH_T e_arch_type; 3204 3205 UWORD8 u1_is_popcnt_available; 3206 3207 WORD32 i4_active_scene_num; 3208 3209 WORD32 i4_max_fr_enc_loop_parallel_rc; 3210 WORD32 ai4_rc_query[IHEVCE_MAX_NUM_BITRATES]; 3211 WORD32 i4_active_enc_frame_id; 3212 3213 /** 3214 * LAP interface ctxt pointer 3215 */ 3216 void *pv_lap_interface_ctxt; 3217 3218 /* If enable, enables blu ray compatibility of op*/ 3219 WORD32 i4_blu_ray_spec; 3220 3221 } enc_ctxt_t; 3222 3223 /** 3224 ****************************************************************************** 3225 * @brief This struct contains the inter CTB params needed for the decision 3226 * of the best inter CU results 3227 ****************************************************************************** 3228 */ 3229 typedef struct 3230 { 3231 hme_pred_buf_mngr_t s_pred_buf_mngr; 3232 3233 /** X and y offset of ctb w.r.t. start of pic */ 3234 WORD32 i4_ctb_x_off; 3235 WORD32 i4_ctb_y_off; 3236 3237 /** 3238 * Pred buffer ptr, updated inside subpel refinement process. This 3239 * location passed to the leaf fxn for copying the winner pred buf 3240 */ 3241 UWORD8 **ppu1_pred; 3242 3243 /** Working mem passed to leaf fxns */ 3244 UWORD8 *pu1_wkg_mem; 3245 3246 /** prediction buffer stride fo rleaf fxns to copy the pred winner buf */ 3247 WORD32 i4_pred_stride; 3248 3249 /** Stride of input buf, updated inside subpel fxn */ 3250 WORD32 i4_inp_stride; 3251 3252 /** stride of recon buffer */ 3253 WORD32 i4_rec_stride; 3254 3255 /** Indicates if bi dir is enabled or not */ 3256 WORD32 i4_bidir_enabled; 3257 3258 /** 3259 * Total number of references of current picture which is enocded 3260 */ 3261 UWORD8 u1_num_ref; 3262 3263 /** Recon Pic buffer pointers for L0 list */ 3264 recon_pic_buf_t **pps_rec_list_l0; 3265 3266 /** Recon Pic buffer pointers for L1 list */ 3267 recon_pic_buf_t **pps_rec_list_l1; 3268 3269 /** 3270 * These pointers point to modified input, one each for one ref idx. 3271 * Instead of weighting the reference, we weight the input with inverse 3272 * wt and offset for list 0 and list 1. 3273 */ 3274 UWORD8 *apu1_wt_inp[2][MAX_NUM_REF]; 3275 3276 /* Since ME uses weighted inputs, we use reciprocal of the actual weights */ 3277 /* that are signaled in the bitstream */ 3278 WORD32 *pi4_inv_wt; 3279 WORD32 *pi4_inv_wt_shift_val; 3280 3281 /* Map between L0 Reference indices and LC indices */ 3282 WORD8 *pi1_past_list; 3283 3284 /* Map between L1 Reference indices and LC indices */ 3285 WORD8 *pi1_future_list; 3286 3287 /** 3288 * Points to the non-weighted input data for the current CTB 3289 */ 3290 UWORD8 *pu1_non_wt_inp; 3291 3292 /** 3293 * Store the pred lambda and lamda_qshifts for all the reference indices 3294 */ 3295 WORD32 i4_lamda; 3296 3297 UWORD8 u1_lamda_qshift; 3298 3299 WORD32 wpred_log_wdc; 3300 3301 /** 3302 * Number of active references in l0 3303 */ 3304 UWORD8 u1_num_active_ref_l0; 3305 3306 /** 3307 * Number of active references in l1 3308 */ 3309 UWORD8 u1_num_active_ref_l1; 3310 3311 /** The max_depth for inter tu_tree */ 3312 UWORD8 u1_max_tr_depth; 3313 3314 /** Quality Preset */ 3315 WORD8 i1_quality_preset; 3316 3317 /** SATD or SAD */ 3318 UWORD8 u1_use_satd; 3319 3320 /* Frame level QP */ 3321 WORD32 i4_qstep_ls8; 3322 3323 /* Pointer to an array of PU level src variances */ 3324 UWORD32 *pu4_src_variance; 3325 3326 WORD32 i4_alpha_stim_multiplier; 3327 3328 UWORD8 u1_is_cu_noisy; 3329 3330 ULWORD64 *pu8_part_src_sigmaX; 3331 3332 ULWORD64 *pu8_part_src_sigmaXSquared; 3333 3334 UWORD8 u1_max_2nx2n_tu_recur_cands; 3335 3336 } inter_ctb_prms_t; 3337 3338 /*****************************************************************************/ 3339 /* Extern Variable Declarations */ 3340 /*****************************************************************************/ 3341 extern const double lamda_modifier_for_I_pic[8]; 3342 3343 /*****************************************************************************/ 3344 /* Extern Function Declarations */ 3345 /*****************************************************************************/ 3346 3347 #endif /* _IHEVCE_ENC_STRUCTS_H_ */ 3348