1 /****************************************************************************** 2 * 3 * Copyright (C) 2018 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ***************************************************************************** 18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 */ 20 /*! 21 ****************************************************************************** 22 * \file hme_defs.h 23 * 24 * \brief 25 * Important definitions, enumerations, macros and structures used by ME 26 * 27 * \date 28 * 18/09/2012 29 * 30 * \author 31 * Ittiam 32 * 33 ****************************************************************************** 34 */ 35 36 #ifndef _HME_DEFS_H_ 37 #define _HME_DEFS_H_ 38 39 /*****************************************************************************/ 40 /* Constant Macros */ 41 /*****************************************************************************/ 42 /** 43 ******************************************************************************* 44 @brief Blk size of the CTB in the max possible case 45 ******************************************************************************* 46 */ 47 #define CTB_BLK_SIZE 64 48 49 /** 50 ******************************************************************************* 51 @brief Maximun number of results per partition 52 ******************************************************************************* 53 */ 54 #define MAX_RESULTS_PER_PART 2 55 56 /** 57 ******************************************************************************* 58 @brief Not used currently 59 ******************************************************************************* 60 */ 61 #define MAX_NUM_UNIFIED_RESULTS 10 62 #define MAX_NUM_CTB_NODES 10 63 64 /** 65 ******************************************************************************* 66 @brief For 64x64 CTB, we have 16x16 MV grid for prediction purposes (cost calc) 67 This has 1 padding at boundaries for causal neighbours 68 ******************************************************************************* 69 */ 70 #define CTB_MV_GRID_PAD 1 71 72 /** 73 ******************************************************************************* 74 @brief number of bits per bin 75 ******************************************************************************* 76 */ 77 #define HME_CABAC_BITS_PER_BIN 0.5 78 79 /** 80 ******************************************************************************* 81 @brief bin count to bit count conversion 82 ******************************************************************************* 83 */ 84 #define HME_GET_CAB_BIT(x) (U08(((x)*HME_CABAC_BITS_PER_BIN + 0.5))) 85 86 /** 87 ******************************************************************************* 88 @brief Columns in the MV grid 89 ******************************************************************************* 90 */ 91 #define NUM_COLUMNS_IN_CTB_GRID (((CTB_BLK_SIZE) >> 2) + (2 * CTB_MV_GRID_PAD)) 92 93 /** 94 ******************************************************************************* 95 @brief Rows in MV grid 96 ******************************************************************************* 97 */ 98 #define NUM_ROWS_IN_CTB_GRID (NUM_COLUMNS_IN_CTB_GRID) 99 100 /** 101 ******************************************************************************* 102 @brief Total number of MVs held in CTB grid for prediction pourposes 103 ******************************************************************************* 104 */ 105 #define NUM_MVS_IN_CTB_GRID ((NUM_COLUMNS_IN_CTB_GRID) * (NUM_ROWS_IN_CTB_GRID)) 106 107 /** 108 ******************************************************************************* 109 @brief Max number of candidates used for refinement during CU merge stage 110 ******************************************************************************* 111 */ 112 #define MAX_MERGE_CANDTS 64 113 114 /** 115 ******************************************************************************* 116 @brief For BIDIR refinement, we use 2I-P0 as input, done max at CTB level, so 117 stride for this input is 64 118 ******************************************************************************* 119 */ 120 #define BACK_PREDICTION_INPUT_STRIDE 64 121 122 /** 123 ******************************************************************************* 124 @brief We basically store an impossible and unique MV to identify intra blks 125 or CUs 126 ******************************************************************************* 127 */ 128 #define INTRA_MV 0x4000 129 130 /** 131 ******************************************************************************* 132 @brief Defines the largest CTB supported by HME 133 ******************************************************************************* 134 */ 135 #define HME_MAX_CTB_SIZE 64 136 137 /** 138 ******************************************************************************* 139 @brief Maximum number of 16x16 blks possible in a CTB. The basic search unit 140 in the encode layer is 16x16 141 ******************************************************************************* 142 */ 143 #define HME_MAX_16x16_IN_CTB ((HME_MAX_CTB_SIZE >> 4) * (HME_MAX_CTB_SIZE >> 4)) 144 145 /** 146 ******************************************************************************* 147 @brief Max number of 8x8s possible in a CTB, this in other words is also the 148 maximum number of CUs possible in a CTB 149 ******************************************************************************* 150 */ 151 #define HME_MAX_8x8_IN_CTB ((HME_MAX_CTB_SIZE >> 3) * (HME_MAX_CTB_SIZE >> 3)) 152 153 /** 154 ******************************************************************************* 155 @brief Maximum number of init candts supported for refinement search. 156 ******************************************************************************* 157 */ 158 #define MAX_INIT_CANDTS 60 159 160 /** 161 ******************************************************************************* 162 @brief Maximum MV in X and Y directions in fullpel units allowed in any layer 163 Any computed range for MV hasto be within this 164 ******************************************************************************* 165 */ 166 #define MAX_MV_X_FINEST 1024 167 #define MAX_MV_Y_FINEST 512 168 169 #define MAX_NUM_RESULTS 10 170 171 #define USE_MODIFIED 1 172 173 #define ENABLE_EXPLICIT_SEARCH_IN_P_IN_L0 1 174 175 #define ENABLE_EXPLICIT_SEARCH_IN_PQ 0 176 177 /** 178 ******************************************************************************* 179 @brief Driven by reasoning that we can tolerate an error of 4 in global mv 180 in coarsest layer per comp, assuming we have search range of 1024x512, the mv 181 range in coarse layer is 128x64, total bins is then 256/4 x 128/4 or 2K bins 182 ******************************************************************************* 183 */ 184 #define LOG_MAX_NUM_BINS 11 185 #define MAX_NUM_BINS (1 << LOG_MAX_NUM_BINS) 186 187 #define NEXT_BLOCK_OFFSET_IN_L0_ME 22 188 189 #define PREV_BLOCK_OFFSET_IN_L0_ME 6 190 191 #define COLOCATED_BLOCK_OFFSET 2 192 193 #define COLOCATED_4X4_NEXT_BLOCK_OFFSET 14 194 195 #define MAP_X_MAX 16 196 197 #define MAP_Y_MAX 16 198 199 #define NUM_POINTS_IN_RECTANGULAR_GRID 9 200 201 /* 202 ****************************************************************************** 203 @brief Maximum number of elements in the sigmaX and sigmaX-Square array 204 computed at 4x4 level for any CU size 205 ****************************************************************************** 206 */ 207 #define MAX_NUM_SIGMAS_4x4 256 208 209 /*****************************************************************************/ 210 /* Function Macros */ 211 /*****************************************************************************/ 212 213 /** 214 ******************************************************************************* 215 @brief Calculates number of blks in picture, given width, ht, and a variable 216 shift that controls basic blk size 217 ******************************************************************************* 218 */ 219 #define GET_NUM_BLKS_IN_PIC(wd, ht, shift, num_cols, num_blks) \ 220 { \ 221 S32 y, rnd; \ 222 rnd = (1 << shift) - 1; \ 223 num_cols = (wd + rnd) >> shift; \ 224 y = (ht + rnd) >> shift; \ 225 num_blks = num_cols * y; \ 226 } 227 228 #define COUNT_CANDS(a, b) \ 229 { \ 230 b = (((a) & (1))) + (((a >> 1) & (1))) + (((a >> 2) & (1))) + (((a >> 3) & (1))) + \ 231 (((a >> 4) & (1))) + (((a >> 5) & (1))) + (((a >> 6) & (1))) + (((a >> 7) & (1))) + \ 232 (((a >> 8) & (1))); \ 233 } 234 235 #define COPY_MV_TO_SEARCH_NODE(node, mv, pref, refid, shift) \ 236 { \ 237 (node)->s_mv.i2_mvx = (mv)->i2_mv_x; \ 238 (node)->s_mv.i2_mvy = (mv)->i2_mv_y; \ 239 (node)->i1_ref_idx = *pref; \ 240 (node)->u1_is_avail = 1; \ 241 \ 242 /* Can set the availability flag for MV Pred purposes */ \ 243 if(((node)->i1_ref_idx < 0) || ((node)->s_mv.i2_mvx == INTRA_MV)) \ 244 { \ 245 (node)->u1_is_avail = 0; \ 246 (node)->i1_ref_idx = refid; \ 247 (node)->s_mv.i2_mvx = 0; \ 248 (node)->s_mv.i2_mvy = 0; \ 249 } \ 250 (node)->s_mv.i2_mvx >>= (shift); \ 251 (node)->s_mv.i2_mvy >>= (shift); \ 252 (node)->u1_subpel_done = (shift) ? 0 : 1; \ 253 } 254 255 #define COMPUTE_MVD(ps_mv, ps_data, cumulative_mv_distance) \ 256 { \ 257 S32 mvx_q8 = (ps_mv)->mvx << 8; \ 258 S32 mvy_q8 = (ps_mv)->mvy << 8; \ 259 S32 mvcx_q8 = (ps_data)->s_centroid.i4_pos_x_q8; \ 260 S32 mvcy_q8 = (ps_data)->s_centroid.i4_pos_y_q8; \ 261 \ 262 S32 mvdx_q8 = mvx_q8 - mvcx_q8; \ 263 S32 mvdy_q8 = mvy_q8 - mvcy_q8; \ 264 \ 265 S32 mvdx = (mvdx_q8 + (1 << 7)) >> 8; \ 266 S32 mvdy = (mvdy_q8 + (1 << 7)) >> 8; \ 267 \ 268 S32 mvd = ABS(mvdx) + ABS(mvdy); \ 269 \ 270 cumulative_mv_distance += mvd; \ 271 } 272 273 #define STATS_COLLECTOR_MV_INSERT( \ 274 ps_mv_store, num_mvs_stored, mvx_cur, mvy_cur, stats_struct, check_for_duplicate, ref_idx) \ 275 { \ 276 S32 i4_j; \ 277 (stats_struct).f_num_cands_being_processed++; \ 278 check_for_duplicate = 0; \ 279 \ 280 for(i4_j = 0; i4_j < (num_mvs_stored); i4_j++) \ 281 { \ 282 if(((ps_mv_store)[i4_j].s_mv.i2_mvx == (mvx_cur)) && \ 283 ((ps_mv_store)[i4_j].s_mv.i2_mvy == (mvy_cur)) && \ 284 ((ps_mv_store)[i4_j].i1_ref_idx == ref_idx)) \ 285 { \ 286 (stats_struct).f_num_duplicates_amongst_processed++; \ 287 check_for_duplicate = 0; \ 288 break; \ 289 } \ 290 } \ 291 \ 292 if(i4_j == (num_mvs_stored)) \ 293 { \ 294 (ps_mv_store)[i4_j].s_mv.i2_mvx = (mvx_cur); \ 295 (ps_mv_store)[i4_j].s_mv.i2_mvy = (mvy_cur); \ 296 (ps_mv_store)[i4_j].i1_ref_idx = ref_idx; \ 297 (num_mvs_stored)++; \ 298 } \ 299 } 300 301 #define UPDATE_CLUSTER_METADATA_POST_MERGE(ps_cluster) \ 302 { \ 303 S32 m; \ 304 \ 305 S32 num_clusters_evaluated = 0; \ 306 \ 307 for(m = 0; num_clusters_evaluated < (ps_cluster)->num_clusters; m++) \ 308 { \ 309 if(!((ps_cluster)->as_cluster_data[m].is_valid_cluster)) \ 310 { \ 311 if(-1 != (ps_cluster)->as_cluster_data[m].ref_id) \ 312 { \ 313 (ps_cluster)->au1_num_clusters[(ps_cluster)->as_cluster_data[m].ref_id]--; \ 314 } \ 315 } \ 316 else \ 317 { \ 318 num_clusters_evaluated++; \ 319 } \ 320 } \ 321 } 322 323 #define SET_VALUES_FOR_TOP_REF_IDS(ps_cluster_blk, best_uni_ref, best_alt_ref, num_ref) \ 324 { \ 325 ps_cluster_blk->best_uni_ref = best_uni_ref; \ 326 ps_cluster_blk->best_alt_ref = best_alt_ref; \ 327 ps_cluster_blk->num_refs = num_ref; \ 328 } 329 330 #define MAP_X_MAX 16 331 #define MAP_Y_MAX 16 332 333 #define CHECK_FOR_DUPES_AND_INSERT_UNIQUE_NODES( \ 334 ps_dedup_enabler, num_cands, mvx, mvy, check_for_duplicate) \ 335 { \ 336 S32 center_mvx; \ 337 S32 center_mvy; \ 338 S32 mvdx; \ 339 S32 mvdy; \ 340 U32 *pu4_node_map; \ 341 S32 columnar_presence; \ 342 \ 343 (check_for_duplicate) = 0; \ 344 { \ 345 subpel_dedup_enabler_t *ps_dedup = &(ps_dedup_enabler)[0]; \ 346 center_mvx = ps_dedup->i2_mv_x; \ 347 center_mvy = ps_dedup->i2_mv_y; \ 348 pu4_node_map = ps_dedup->au4_node_map; \ 349 \ 350 mvdx = (mvx)-center_mvx; \ 351 mvdy = (mvy)-center_mvy; \ 352 \ 353 if(((mvdx < MAP_X_MAX) && (mvdx >= -MAP_X_MAX)) && \ 354 ((mvdy < MAP_Y_MAX) && (mvdy >= -MAP_Y_MAX))) \ 355 { \ 356 columnar_presence = pu4_node_map[MAP_X_MAX + mvdx]; \ 357 \ 358 if(0 == (columnar_presence & (1U << (MAP_Y_MAX + mvdy)))) \ 359 { \ 360 columnar_presence |= (1U << (MAP_Y_MAX + mvdy)); \ 361 pu4_node_map[MAP_X_MAX + mvdx] = columnar_presence; \ 362 } \ 363 else \ 364 { \ 365 (check_for_duplicate) = 1; \ 366 } \ 367 } \ 368 } \ 369 } 370 371 #define BUMP_OUTLIER_CLUSTERS(ps_cluster_blk, sdi_threshold) \ 372 { \ 373 outlier_data_t as_outliers[MAX_NUM_CLUSTERS_64x64 + 1]; \ 374 \ 375 S32 j, k; \ 376 \ 377 S32 num_clusters_evaluated = 0; \ 378 S32 num_clusters = ps_cluster_blk->num_clusters; \ 379 S32 num_outliers_present = 0; \ 380 \ 381 for(j = 0; num_clusters_evaluated < num_clusters; j++) \ 382 { \ 383 cluster_data_t *ps_data = &ps_cluster_blk->as_cluster_data[j]; \ 384 \ 385 if(!ps_data->is_valid_cluster) \ 386 { \ 387 continue; \ 388 } \ 389 \ 390 num_clusters_evaluated++; \ 391 \ 392 if((ps_data->num_mvs == 1) && (ps_data->as_mv[0].sdi < sdi_threshold) && \ 393 (ps_cluster_blk->au1_num_clusters[ps_data->ref_id] > \ 394 MAX_NUM_CLUSTERS_IN_ONE_REF_IDX)) \ 395 { \ 396 as_outliers[num_outliers_present].cluster_id = j; \ 397 as_outliers[num_outliers_present].ref_idx = ps_data->ref_id; \ 398 as_outliers[num_outliers_present].sdi = ps_data->as_mv[0].sdi; \ 399 num_outliers_present++; \ 400 } \ 401 } \ 402 \ 403 for(j = 0; j < (num_outliers_present - 1); j++) \ 404 { \ 405 for(k = (j + 1); k < num_outliers_present; k++) \ 406 { \ 407 if(as_outliers[j].sdi > as_outliers[k].sdi) \ 408 { \ 409 as_outliers[MAX_NUM_CLUSTERS_64x64] = as_outliers[j]; \ 410 as_outliers[j] = as_outliers[k]; \ 411 as_outliers[k] = as_outliers[MAX_NUM_CLUSTERS_64x64]; \ 412 } \ 413 } \ 414 } \ 415 \ 416 for(j = 0; j < (num_outliers_present); j++) \ 417 { \ 418 S32 ref_idx = as_outliers[j].ref_idx; \ 419 \ 420 if((ps_cluster_blk->au1_num_clusters[ref_idx] > MAX_NUM_CLUSTERS_IN_ONE_REF_IDX)) \ 421 { \ 422 ps_cluster_blk->as_cluster_data[as_outliers[j].cluster_id].is_valid_cluster = 0; \ 423 ps_cluster_blk->num_clusters--; \ 424 ps_cluster_blk->au1_num_clusters[ref_idx]--; \ 425 } \ 426 } \ 427 } 428 429 #define ADD_CLUSTER_CENTROID_AS_CANDS_FOR_BLK_MERGE( \ 430 ps_cluster_data, ps_range_prms, ps_list, ps_mv, is_ref_in_l0, ref_idx) \ 431 { \ 432 ps_list = &(ps_cluster_data)->as_mv_list[!(is_ref_in_l0)][(ref_idx)]; \ 433 ps_mv = &ps_list->as_mv[ps_list->num_mvs]; \ 434 \ 435 ps_mv->i2_mvx = (ps_centroid->i4_pos_x_q8 + (1 << 7)) >> 8; \ 436 ps_mv->i2_mvy = (ps_centroid->i4_pos_y_q8 + (1 << 7)) >> 8; \ 437 \ 438 CLIP_MV_WITHIN_RANGE(ps_mv->i2_mvx, ps_mv->i2_mvy, (ps_range_prms), 0, 0, 0); \ 439 \ 440 ps_cluster_data->ai4_ref_id_valid[!(is_ref_in_l0)][(ref_idx)] = 1; \ 441 \ 442 ps_list->num_mvs++; \ 443 } 444 445 #define COPY_SEARCH_CANDIDATE_DATA(node, mv, pref, refid, shift) \ 446 { \ 447 (node)->ps_mv->i2_mvx = (mv)->i2_mv_x; \ 448 (node)->ps_mv->i2_mvy = (mv)->i2_mv_y; \ 449 (node)->i1_ref_idx = *pref; \ 450 (node)->u1_is_avail = 1; \ 451 \ 452 /* Can set the availability flag for MV Pred purposes */ \ 453 if(((node)->i1_ref_idx < 0) || ((node)->ps_mv->i2_mvx == INTRA_MV)) \ 454 { \ 455 (node)->u1_is_avail = 0; \ 456 (node)->i1_ref_idx = refid; \ 457 (node)->ps_mv->i2_mvx = 0; \ 458 (node)->ps_mv->i2_mvy = 0; \ 459 } \ 460 (node)->ps_mv->i2_mvx >>= (shift); \ 461 (node)->ps_mv->i2_mvy >>= (shift); \ 462 (node)->u1_subpel_done = (shift) ? 0 : 1; \ 463 } 464 /** 465 ******************************************************************************* 466 * @macro MIN_NODE 467 * @brief Returns the search node with lesser cost 468 ******************************************************************************* 469 */ 470 #define MIN_NODE(a, b) (((a)->i4_tot_cost < (b)->i4_tot_cost) ? (a) : (b)) 471 472 /** 473 ******************************************************************************* 474 * @macro MAX_NODE 475 * @brief Returns search node with higher cost 476 ******************************************************************************* 477 */ 478 #define MAX_NODE(a, b) (((a)->i4_tot_cost >= (b)->i4_tot_cost) ? (a) : (b)) 479 480 /** 481 ****************************************************************************** 482 * @macro HME_INV_WT_PRED 483 * @brief Implements inverse of wt pred formula. Actual wt pred formula is 484 * ((input * wt) + rnd) >> shift) + offset 485 ****************************************************************************** 486 */ 487 #define HME_INV_WT_PRED(inp, wt, off, shift) (((((inp) - (off)) << (shift)) + ((wt) >> 1)) / (wt)) 488 #define HME_INV_WT_PRED1(inp, wt, off, shift) \ 489 (((((inp) - (off)) << (shift)) * wt + (1 << 14)) >> 15) 490 491 /** 492 ****************************************************************************** 493 * @macro HME_WT_PRED 494 * @brief Implements wt pred formula as per spec 495 ****************************************************************************** 496 */ 497 #define HME_WT_PRED(p0, p1, w0, w1, rnd, shift) \ 498 (((((S32)w0) * ((S32)p0) + ((S32)w1) * ((S32)p1)) >> shift) + rnd) 499 500 /** 501 ****************************************************************************** 502 * @macro PREFETCH_BLK 503 * @brief Prefetches a block of data into cahce before hand 504 ****************************************************************************** 505 */ 506 507 /** 508 ****************************************************************************** 509 * @macro INSERT_NEW_NODE 510 * @brief Inserts a new search node in a list if it is unique; helps in 511 removing duplicate nodes/candidates 512 ****************************************************************************** 513 */ 514 #define PREFETCH_BLK(pu1_src, src_stride, lines, type) \ 515 { \ 516 WORD32 ctr; \ 517 for(ctr = 0; ctr < lines; ctr++) \ 518 { \ 519 PREFETCH((char const *)pu1_src, type); \ 520 pu1_src += src_stride; \ 521 } \ 522 } 523 524 #define INSERT_UNIQUE_NODE( \ 525 as_nodes, num_nodes, new_node, au4_map, center_x, center_y, use_hashing) \ 526 { \ 527 WORD32 k; \ 528 UWORD32 map; \ 529 WORD32 delta_x, delta_y; \ 530 delta_x = (new_node).ps_mv->i2_mvx - (center_x); \ 531 delta_y = (new_node).ps_mv->i2_mvy - (center_y); \ 532 map = 0; \ 533 \ 534 if((use_hashing) && (delta_x < MAP_X_MAX) && (delta_x >= (-MAP_X_MAX)) && \ 535 (delta_y < MAP_Y_MAX) && (delta_y >= (-MAP_Y_MAX))) \ 536 { \ 537 map = (au4_map)[delta_x + MAP_X_MAX]; \ 538 if(0 == (map & (1U << (delta_y + MAP_Y_MAX)))) \ 539 { \ 540 (new_node).s_mv = (new_node).ps_mv[0]; \ 541 (as_nodes)[(num_nodes)] = (new_node); \ 542 ((num_nodes))++; \ 543 map |= 1U << (delta_y + MAP_Y_MAX); \ 544 (au4_map)[delta_x + MAP_X_MAX] = map; \ 545 } \ 546 } \ 547 else \ 548 { \ 549 for(k = 0; k < ((num_nodes)); k++) \ 550 { \ 551 /* Search is this node is already present in unique list */ \ 552 if(((as_nodes)[k].s_mv.i2_mvx == (new_node).ps_mv->i2_mvx) && \ 553 ((as_nodes)[k].s_mv.i2_mvy == (new_node).ps_mv->i2_mvy) && \ 554 ((as_nodes)[k].i1_ref_idx == (new_node).i1_ref_idx)) \ 555 { \ 556 /* This is duplicate node; need not be inserted */ \ 557 break; \ 558 } \ 559 } \ 560 if(k == ((num_nodes))) \ 561 { \ 562 /* Insert new node only if it is not duplicate node */ \ 563 (new_node).s_mv = (new_node).ps_mv[0]; \ 564 (as_nodes)[k] = (new_node); \ 565 ((num_nodes))++; \ 566 } \ 567 } \ 568 } 569 570 /** 571 ****************************************************************************** 572 * @macro INSERT_NEW_NODE 573 * @brief Inserts a new search node in a list if it is unique; helps in 574 removing duplicate nodes/candidates 575 ****************************************************************************** 576 */ 577 #define INSERT_NEW_NODE_NOMAP(as_nodes, num_nodes, new_node, implicit_layer) \ 578 { \ 579 WORD32 k; \ 580 if(!implicit_layer) \ 581 { \ 582 for(k = 0; k < (num_nodes); k++) \ 583 { \ 584 /* Search is this node is already present in unique list */ \ 585 if((as_nodes[k].s_mv.i2_mvx == new_node.s_mv.i2_mvx) && \ 586 (as_nodes[k].s_mv.i2_mvy == new_node.s_mv.i2_mvy)) \ 587 { \ 588 /* This is duplicate node; need not be inserted */ \ 589 break; \ 590 } \ 591 } \ 592 } \ 593 else \ 594 { \ 595 for(k = 0; k < (num_nodes); k++) \ 596 { \ 597 /* Search is this node is already present in unique list */ \ 598 if((as_nodes[k].s_mv.i2_mvx == new_node.s_mv.i2_mvx) && \ 599 (as_nodes[k].s_mv.i2_mvy == new_node.s_mv.i2_mvy) && \ 600 (as_nodes[k].i1_ref_idx == new_node.i1_ref_idx)) \ 601 { \ 602 /* This is duplicate node; need not be inserted */ \ 603 break; \ 604 } \ 605 } \ 606 } \ 607 \ 608 if(k == (num_nodes)) \ 609 { \ 610 /* Insert new node only if it is not duplicate node */ \ 611 as_nodes[k] = new_node; \ 612 (num_nodes)++; \ 613 } \ 614 } 615 /** 616 ****************************************************************************** 617 * @macro INSERT_NEW_NODE_NOMAP_ALTERNATE 618 * @brief Inserts a new search node in a list if it is unique; helps in 619 removing duplicate nodes/candidates 620 ****************************************************************************** 621 */ 622 #define INSERT_NEW_NODE_NOMAP_ALTERNATE(as_nodes, num_nodes, new_node, result_num, part_id) \ 623 { \ 624 WORD32 k; \ 625 WORD32 part_id_1 = (new_node->i4_num_valid_parts > 8) ? new_node->ai4_part_id[part_id] \ 626 : part_id; \ 627 for(k = 0; k < (num_nodes); k++) \ 628 { \ 629 /* Search is this node is already present in unique list */ \ 630 if((as_nodes[k].s_mv.i2_mvx == new_node->i2_mv_x[result_num][part_id_1]) && \ 631 (as_nodes[k].s_mv.i2_mvy == new_node->i2_mv_y[result_num][part_id_1]) && \ 632 (as_nodes[k].i1_ref_idx == new_node->i2_ref_idx[result_num][part_id_1])) \ 633 { \ 634 /* This is duplicate node; need not be inserted */ \ 635 break; \ 636 } \ 637 } \ 638 \ 639 if(k == (num_nodes)) \ 640 { \ 641 /* Insert new node only if it is not duplicate node */ \ 642 as_nodes[k].i4_tot_cost = (WORD32)new_node->i2_tot_cost[result_num][part_id_1]; \ 643 as_nodes[k].i4_mv_cost = (WORD32)new_node->i2_mv_cost[result_num][part_id_1]; \ 644 as_nodes[k].s_mv.i2_mvx = new_node->i2_mv_x[result_num][part_id_1]; \ 645 as_nodes[k].s_mv.i2_mvy = new_node->i2_mv_y[result_num][part_id_1]; \ 646 as_nodes[k].i1_ref_idx = (WORD8)new_node->i2_ref_idx[result_num][part_id_1]; \ 647 as_nodes[k].u1_part_id = new_node->ai4_part_id[part_id]; \ 648 (num_nodes)++; \ 649 } \ 650 } 651 652 #define INSERT_NEW_NODE( \ 653 as_nodes, num_nodes, new_node, implicit_layer, au4_map, center_x, center_y, use_hashing) \ 654 { \ 655 WORD32 k; \ 656 UWORD32 map; \ 657 WORD32 delta_x, delta_y; \ 658 delta_x = (new_node).s_mv.i2_mvx - center_x; \ 659 delta_y = (new_node).s_mv.i2_mvy - center_y; \ 660 map = 0; \ 661 if((delta_x < MAP_X_MAX) && (delta_x >= (-MAP_X_MAX)) && (delta_y < MAP_Y_MAX) && \ 662 (delta_y >= (-MAP_Y_MAX)) && (use_hashing)) \ 663 { \ 664 map = (au4_map)[delta_x + MAP_X_MAX]; \ 665 if(0 == (map & (1U << (delta_y + MAP_Y_MAX)))) \ 666 { \ 667 (as_nodes)[(num_nodes)] = (new_node); \ 668 (num_nodes)++; \ 669 map |= 1U << (delta_y + MAP_Y_MAX); \ 670 (au4_map)[delta_x + MAP_X_MAX] = map; \ 671 } \ 672 } \ 673 else if(!(implicit_layer)) \ 674 { \ 675 for(k = 0; k < (num_nodes); k++) \ 676 { \ 677 /* Search is this node is already present in unique list */ \ 678 if(((as_nodes)[k].s_mv.i2_mvx == (new_node).s_mv.i2_mvx) && \ 679 ((as_nodes)[k].s_mv.i2_mvy == (new_node).s_mv.i2_mvy)) \ 680 { \ 681 /* This is duplicate node; need not be inserted */ \ 682 break; \ 683 } \ 684 } \ 685 if(k == (num_nodes)) \ 686 { \ 687 /* Insert new node only if it is not duplicate node */ \ 688 (as_nodes)[k] = (new_node); \ 689 (num_nodes)++; \ 690 } \ 691 } \ 692 else \ 693 { \ 694 for(k = 0; k < (num_nodes); k++) \ 695 { \ 696 /* Search is this node is already present in unique list */ \ 697 if(((as_nodes)[k].s_mv.i2_mvx == (new_node).s_mv.i2_mvx) && \ 698 ((as_nodes)[k].s_mv.i2_mvy == (new_node).s_mv.i2_mvy) && \ 699 ((as_nodes)[k].i1_ref_idx == (new_node).i1_ref_idx)) \ 700 { \ 701 /* This is duplicate node; need not be inserted */ \ 702 break; \ 703 } \ 704 } \ 705 if(k == (num_nodes)) \ 706 { \ 707 /* Insert new node only if it is not duplicate node */ \ 708 (as_nodes)[k] = (new_node); \ 709 (num_nodes)++; \ 710 } \ 711 } \ 712 } 713 714 #define COMPUTE_DIFF_MV(mvdx, mvdy, inp_node, mv_p_x, mv_p_y, inp_sh, pred_sh) \ 715 { \ 716 mvdx = (inp_node)->s_mv.i2_mvx << (inp_sh); \ 717 mvdy = (inp_node)->s_mv.i2_mvy << (inp_sh); \ 718 mvdx -= ((mv_p_x) << (pred_sh)); \ 719 mvdy -= ((mv_p_y) << (pred_sh)); \ 720 } 721 722 #define COMPUTE_MV_DIFFERENCE(mvdx, mvdy, inp_node, mv_p_x, mv_p_y, inp_sh, pred_sh) \ 723 { \ 724 mvdx = (inp_node)->ps_mv->i2_mvx << (inp_sh); \ 725 mvdy = (inp_node)->ps_mv->i2_mvy << (inp_sh); \ 726 mvdx -= ((mv_p_x) << (pred_sh)); \ 727 mvdy -= ((mv_p_y) << (pred_sh)); \ 728 } 729 730 /** 731 ****************************************************************************** 732 * @enum CU_MERGE_RESULT_T 733 * @brief Describes the results of merge, whether successful or not 734 ****************************************************************************** 735 */ 736 typedef enum 737 { 738 CU_MERGED, 739 CU_SPLIT 740 } CU_MERGE_RESULT_T; 741 742 /** 743 ****************************************************************************** 744 * @enum PART_ORIENT_T 745 * @brief Describes the orientation of partition (vert/horz, left/rt) 746 ****************************************************************************** 747 */ 748 typedef enum 749 { 750 VERT_LEFT, 751 VERT_RIGHT, 752 HORZ_TOP, 753 HORZ_BOT 754 } PART_ORIENT_T; 755 756 /** 757 ****************************************************************************** 758 * @enum GRID_PT_T 759 * @brief For a 3x3 rect grid, nubers each pt as shown 760 * 5 2 6 761 * 1 0 3 762 * 7 4 8 763 ****************************************************************************** 764 */ 765 typedef enum 766 { 767 PT_C = 0, 768 PT_L = 1, 769 PT_T = 2, 770 PT_R = 3, 771 PT_B = 4, 772 PT_TL = 5, 773 PT_TR = 6, 774 PT_BL = 7, 775 PT_BR = 8, 776 NUM_GRID_PTS 777 } GRID_PT_T; 778 779 /** 780 ****************************************************************************** 781 * @macro IS_POW 782 * @brief Returns whwehter a number is power of 2 783 ****************************************************************************** 784 */ 785 #define IS_POW_2(x) (!((x) & ((x)-1))) 786 787 /** 788 ****************************************************************************** 789 * @macro GRID_ALL_PTS_VALID 790 * @brief For a 3x3 rect grid, this can be used to enable all pts in grid 791 ****************************************************************************** 792 */ 793 #define GRID_ALL_PTS_VALID 0x1ff 794 795 /** 796 ****************************************************************************** 797 * @macro GRID_DIAMOND_ENABLE_ALL 798 * @brief If we search diamond, this enables all 5 pts of diamond (including centre) 799 ****************************************************************************** 800 */ 801 #define GRID_DIAMOND_ENABLE_ALL \ 802 (BIT_EN(PT_C) | BIT_EN(PT_L) | BIT_EN(PT_T) | BIT_EN(PT_R) | BIT_EN(PT_B)) 803 804 /** 805 ****************************************************************************** 806 * @macro GRID_RT_3_INVALID, GRID_LT_3_INVALID,GRID_TOP_3_INVALID,GRID_BOT_3_INVALID 807 * @brief For a square grid search, depending on where the best result is 808 * we can optimise search for next iteration by invalidating some pts 809 ****************************************************************************** 810 */ 811 #define GRID_RT_3_INVALID ((GRID_ALL_PTS_VALID) ^ (BIT_EN(PT_TR) | BIT_EN(PT_R) | BIT_EN(PT_BR))) 812 #define GRID_LT_3_INVALID ((GRID_ALL_PTS_VALID) ^ (BIT_EN(PT_TL) | BIT_EN(PT_L) | BIT_EN(PT_BL))) 813 #define GRID_TOP_3_INVALID ((GRID_ALL_PTS_VALID) ^ (BIT_EN(PT_TL) | BIT_EN(PT_T) | BIT_EN(PT_TR))) 814 #define GRID_BOT_3_INVALID ((GRID_ALL_PTS_VALID) ^ (BIT_EN(PT_BL) | BIT_EN(PT_B) | BIT_EN(PT_BR))) 815 816 /** 817 ****************************************************************************** 818 * @enum GMV_MVTYPE_T 819 * @brief Defines what type of GMV we need (thin lobe for a very spiky 820 * distribution of mv or thick lobe for a blurred distrib of mvs 821 ****************************************************************************** 822 */ 823 typedef enum 824 { 825 GMV_THICK_LOBE, 826 GMV_THIN_LOBE, 827 NUM_GMV_LOBES 828 } GMV_MVTYPE_T; 829 830 /** 831 ****************************************************************************** 832 * @enum BLK_TYPE_T 833 * @brief Defines all possible inter blks possible 834 ****************************************************************************** 835 */ 836 typedef enum 837 { 838 BLK_INVALID = -1, 839 BLK_4x4 = 0, 840 BLK_4x8, 841 BLK_8x4, 842 BLK_8x8, 843 BLK_4x16, 844 BLK_8x16, 845 BLK_12x16, 846 BLK_16x4, 847 BLK_16x8, 848 BLK_16x12, 849 BLK_16x16, 850 BLK_8x32, 851 BLK_16x32, 852 BLK_24x32, 853 BLK_32x8, 854 BLK_32x16, 855 BLK_32x24, 856 BLK_32x32, 857 BLK_16x64, 858 BLK_32x64, 859 BLK_48x64, 860 BLK_64x16, 861 BLK_64x32, 862 BLK_64x48, 863 BLK_64x64, 864 NUM_BLK_SIZES 865 } BLK_SIZE_T; 866 867 /** 868 ****************************************************************************** 869 * @enum SEARCH_COMPLEXITY_T 870 * @brief For refinement layer, this decides the number of refinement candts 871 ****************************************************************************** 872 */ 873 typedef enum 874 { 875 SEARCH_CX_LOW = 0, 876 SEARCH_CX_MED = 1, 877 SEARCH_CX_HIGH = 2 878 } SEARCH_COMPLEXITY_T; 879 880 /** 881 ****************************************************************************** 882 * @enum CTB_BOUNDARY_TYPES_T 883 * @brief For pictures not a multiples of CTB horizontally or vertically, we 884 * define 4 unique cases, centre (full ctbs), bottom boundary (64x8k CTBs), 885 * right boundary (8mx64 CTBs), and bottom rt corner (8mx8k CTB) 886 ****************************************************************************** 887 */ 888 typedef enum 889 { 890 CTB_CENTRE, 891 CTB_BOT_PIC_BOUNDARY, 892 CTB_RT_PIC_BOUNDARY, 893 CTB_BOT_RT_PIC_BOUNDARY, 894 NUM_CTB_BOUNDARY_TYPES, 895 } CTB_BOUNDARY_TYPES_T; 896 897 /** 898 ****************************************************************************** 899 * @enum SEARCH_CANDIDATE_TYPE_T 900 * @brief Monikers for all sorts of search candidates used in ME 901 ****************************************************************************** 902 */ 903 typedef enum 904 { 905 ILLUSORY_CANDIDATE = -1, 906 ZERO_MV = 0, 907 ZERO_MV_ALTREF, 908 SPATIAL_LEFT0, 909 SPATIAL_TOP0, 910 SPATIAL_TOP_RIGHT0, 911 SPATIAL_TOP_LEFT0, 912 SPATIAL_LEFT1, 913 SPATIAL_TOP1, 914 SPATIAL_TOP_RIGHT1, 915 SPATIAL_TOP_LEFT1, 916 PROJECTED_COLOC0, 917 PROJECTED_COLOC1, 918 PROJECTED_COLOC2, 919 PROJECTED_COLOC3, 920 PROJECTED_COLOC4, 921 PROJECTED_COLOC5, 922 PROJECTED_COLOC6, 923 PROJECTED_COLOC7, 924 PROJECTED_COLOC_TR0, 925 PROJECTED_COLOC_TR1, 926 PROJECTED_COLOC_BL0, 927 PROJECTED_COLOC_BL1, 928 PROJECTED_COLOC_BR0, 929 PROJECTED_COLOC_BR1, 930 PROJECTED_TOP0, 931 PROJECTED_TOP1, 932 PROJECTED_TOP_RIGHT0, 933 PROJECTED_TOP_RIGHT1, 934 PROJECTED_TOP_LEFT0, 935 PROJECTED_TOP_LEFT1, 936 PROJECTED_RIGHT0, 937 PROJECTED_RIGHT1, 938 PROJECTED_BOTTOM0, 939 PROJECTED_BOTTOM1, 940 PROJECTED_BOTTOM_RIGHT0, 941 PROJECTED_BOTTOM_RIGHT1, 942 PROJECTED_BOTTOM_LEFT0, 943 PROJECTED_BOTTOM_LEFT1, 944 COLOCATED_GLOBAL_MV0, 945 COLOCATED_GLOBAL_MV1, 946 PROJECTED_TOP2, 947 PROJECTED_TOP3, 948 PROJECTED_TOP_RIGHT2, 949 PROJECTED_TOP_RIGHT3, 950 PROJECTED_TOP_LEFT2, 951 PROJECTED_TOP_LEFT3, 952 PROJECTED_RIGHT2, 953 PROJECTED_RIGHT3, 954 PROJECTED_BOTTOM2, 955 PROJECTED_BOTTOM3, 956 PROJECTED_BOTTOM_RIGHT2, 957 PROJECTED_BOTTOM_RIGHT3, 958 PROJECTED_BOTTOM_LEFT2, 959 PROJECTED_BOTTOM_LEFT3, 960 NUM_SEARCH_CAND_TYPES 961 } SEARCH_CANDIDATE_TYPE_T; 962 963 typedef enum 964 { 965 ILLUSORY_LOCATION = -1, 966 COLOCATED, 967 COLOCATED_4x4_TR, 968 COLOCATED_4x4_BL, 969 COLOCATED_4x4_BR, 970 LEFT, 971 TOPLEFT, 972 TOP, 973 TOPRIGHT, 974 RIGHT, 975 BOTTOMRIGHT, 976 BOTTOM, 977 BOTTOMLEFT, 978 NUM_SEARCH_CAND_LOCATIONS 979 } SEARCH_CAND_LOCATIONS_T; 980 981 /** 982 ****************************************************************************** 983 * @macros ENABLE_mxn 984 * @brief Enables a type or a group of partitions. ENABLE_ALL_PARTS, enables all 985 * partitions, while others enable selected partitions. These can be used 986 * to set the mask of active partitions 987 ****************************************************************************** 988 */ 989 #define ENABLE_2Nx2N (BIT_EN(PART_ID_2Nx2N)) 990 #define ENABLE_2NxN (BIT_EN(PART_ID_2NxN_T) | BIT_EN(PART_ID_2NxN_B)) 991 #define ENABLE_Nx2N (BIT_EN(PART_ID_Nx2N_L) | BIT_EN(PART_ID_Nx2N_R)) 992 #define ENABLE_NxN \ 993 (BIT_EN(PART_ID_NxN_TL) | BIT_EN(PART_ID_NxN_TR) | BIT_EN(PART_ID_NxN_BL) | \ 994 BIT_EN(PART_ID_NxN_BR)) 995 #define ENABLE_2NxnU (BIT_EN(PART_ID_2NxnU_T) | BIT_EN(PART_ID_2NxnU_B)) 996 #define ENABLE_2NxnD (BIT_EN(PART_ID_2NxnD_T) | BIT_EN(PART_ID_2NxnD_B)) 997 #define ENABLE_nLx2N (BIT_EN(PART_ID_nLx2N_L) | BIT_EN(PART_ID_nLx2N_R)) 998 #define ENABLE_nRx2N (BIT_EN(PART_ID_nRx2N_L) | BIT_EN(PART_ID_nRx2N_R)) 999 #define ENABLE_AMP ((ENABLE_2NxnU) | (ENABLE_2NxnD) | (ENABLE_nLx2N) | (ENABLE_nRx2N)) 1000 #define ENABLE_SMP ((ENABLE_2NxN) | (ENABLE_Nx2N)) 1001 #define ENABLE_ALL_PARTS \ 1002 ((ENABLE_2Nx2N) | (ENABLE_NxN) | (ENABLE_2NxN) | (ENABLE_Nx2N) | (ENABLE_AMP)) 1003 #define ENABLE_SQUARE_PARTS ((ENABLE_2Nx2N) | (ENABLE_NxN)) 1004 1005 /** 1006 ****************************************************************************** 1007 * @enum MV_PEL_RES_T 1008 * @brief Resolution of MV fpel/hpel/qpel units. Useful for maintaining 1009 * predictors. During fpel search, candts, predictors etc are in fpel units, 1010 * in subpel search, they are in subpel units 1011 ****************************************************************************** 1012 */ 1013 typedef enum 1014 { 1015 MV_RES_FPEL, 1016 MV_RES_HPEL, 1017 MV_RES_QPEL 1018 } MV_PEL_RES_T; 1019 1020 /** 1021 ****************************************************************************** 1022 * @enum HME_SET_MVPRED_RES 1023 * @brief Sets resolution for predictor bank (fpel/qpel/hpel units) 1024 ****************************************************************************** 1025 */ 1026 #define HME_SET_MVPRED_RES(ps_pred_ctxt, mv_pel_res) ((ps_pred_ctxt)->mv_pel = mv_pel_res) 1027 1028 /** 1029 ****************************************************************************** 1030 * @enum HME_SET_MVPRED_DIR 1031 * @brief Sets the direction, meaning L0/L1. Since L0 and L1 use separate 1032 * candts, the pred ctxt for them hasto be maintained separately 1033 ****************************************************************************** 1034 */ 1035 #define HME_SET_MVPRED_DIR(ps_pred_ctxt, pred_lx) ((ps_pred_ctxt)->pred_lx = pred_lx) 1036 1037 /** 1038 ****************************************************************************** 1039 * @brief macros to clip / check mv within specified range 1040 ****************************************************************************** 1041 */ 1042 #define CHECK_MV_WITHIN_RANGE(x, y, range) \ 1043 (((x) > (range)->i2_min_x) && ((x) < (range)->i2_max_x) && ((y) > (range)->i2_min_y) && \ 1044 ((y) < (range)->i2_max_y)) 1045 1046 #define CONVERT_MV_LIMIT_TO_QPEL(range) \ 1047 { \ 1048 (range)->i2_max_x <<= 2; \ 1049 (range)->i2_max_y <<= 2; \ 1050 (range)->i2_min_x <<= 2; \ 1051 (range)->i2_min_y <<= 2; \ 1052 } 1053 1054 #define CONVERT_MV_LIMIT_TO_FPEL(range) \ 1055 { \ 1056 (range)->i2_max_x >>= 2; \ 1057 (range)->i2_max_y >>= 2; \ 1058 (range)->i2_min_x >>= 2; \ 1059 (range)->i2_min_y >>= 2; \ 1060 } 1061 1062 /** 1063 ****************************************************************************** 1064 * @brief Swicth to debug the number of subpel search nodes 1065 ****************************************************************************** 1066 */ 1067 #define DEBUG_SUBPEL_SEARCH_NODE_HS_COUNT 0 1068 1069 /** 1070 ****************************************************************************** 1071 * @typedef SAD_GRID_T 1072 * @brief Defines a 2D array type used to store SADs across grid and across 1073 * partition types 1074 ****************************************************************************** 1075 */ 1076 typedef S32 SAD_GRID_T[9][MAX_NUM_PARTS]; 1077 1078 /*****************************************************************************/ 1079 /* Structures */ 1080 /*****************************************************************************/ 1081 1082 /** 1083 ****************************************************************************** 1084 * @struct grid_node_t 1085 * @brief stores a complete info for a candt 1086 ****************************************************************************** 1087 */ 1088 typedef struct 1089 { 1090 S16 i2_mv_x; 1091 S16 i2_mv_y; 1092 S08 i1_ref_idx; 1093 } grid_node_t; 1094 1095 /** 1096 ****************************************************************************** 1097 * @struct search_node_t 1098 * @brief Basic structure used for storage of search results, specification 1099 * of init candidates for search etc. This structure is complete for 1100 * specification of mv and cost for a given direction of search (L0/L1) but 1101 * does not carry information of what type of partition it represents. 1102 ****************************************************************************** 1103 */ 1104 typedef struct 1105 { 1106 /** Motion vector */ 1107 mv_t s_mv; 1108 1109 /** Used in the hme_mv_clipper function to reduce loads and stores */ 1110 mv_t *ps_mv; 1111 1112 /** Ref id, as specified in terms of Lc, unified list */ 1113 S08 i1_ref_idx; 1114 1115 /** Flag to indicate whether mv is in fpel or QPEL units */ 1116 U08 u1_subpel_done; 1117 1118 /** 1119 * Indicates whether this node constitutes a valid predictor candt. 1120 * Since this structure also used for predictor candts, some candts may 1121 * not be available (anti causal or outside pic boundary). Availabilit 1122 * can be inferred using this flag. 1123 */ 1124 U08 u1_is_avail; 1125 1126 /** 1127 * Indicates partition Id to which this node belongs. Useful during 1128 * subpel / fullpel refinement search to identify partition whose 1129 * cost needs to be minimized 1130 */ 1131 U08 u1_part_id; 1132 1133 /** SAD / SATD stored here */ 1134 S32 i4_sad; 1135 1136 /** 1137 * Cost related to coding MV, multiplied by lambda 1138 * TODO : Entry may be redundant, can be removed 1139 */ 1140 S32 i4_mv_cost; 1141 1142 /** Total cost, (SAD + MV Cost) */ 1143 S32 i4_tot_cost; 1144 1145 /** Subpel_Dist_Improvement. 1146 It is the reduction in distortion (SAD or SATD) achieved 1147 from the full-pel stage to the sub-pel stage 1148 */ 1149 S32 i4_sdi; 1150 1151 } search_node_t; 1152 1153 /** 1154 ****************************************************************************** 1155 * @macro INIT_SEARCH_NODE 1156 * @brief Initializes this search_node_t structure. Can be used to zero 1157 * out candts, set max costs in results etc 1158 ****************************************************************************** 1159 */ 1160 #define INIT_SEARCH_NODE(x, a) \ 1161 { \ 1162 (x)->s_mv.i2_mvx = 0; \ 1163 (x)->s_mv.i2_mvy = 0; \ 1164 (x)->i1_ref_idx = a; \ 1165 (x)->i4_tot_cost = MAX_32BIT_VAL; \ 1166 (x)->i4_sad = MAX_32BIT_VAL; \ 1167 (x)->u1_subpel_done = 0; \ 1168 (x)->u1_is_avail = 1; \ 1169 } 1170 1171 /** 1172 ****************************************************************************** 1173 * @struct part_attr_t 1174 * @brief Geometric description of a partition w.r.t. CU start. Note that 1175 * since this is used across various CU sizes, the inference of 1176 * these members is to be done in the context of specific usage 1177 ****************************************************************************** 1178 */ 1179 typedef struct 1180 { 1181 /** Start of partition w.r.t. CU start in x dirn */ 1182 U08 u1_x_start; 1183 /** Size of partitino w.r.t. CU start in x dirn */ 1184 U08 u1_x_count; 1185 /** Start of partition w.r.t. CU start in y dirn */ 1186 U08 u1_y_start; 1187 /** Size of partitino w.r.t. CU start in y dirn */ 1188 U08 u1_y_count; 1189 } part_attr_t; 1190 1191 /** 1192 ****************************************************************************** 1193 * @struct search_candt_t 1194 * @brief Complete information for a given candt in any refinement srch 1195 ****************************************************************************** 1196 */ 1197 typedef struct 1198 { 1199 /** Points to the mv, ref id info. */ 1200 search_node_t *ps_search_node; 1201 /** Number of refinemnts to be done for this candt */ 1202 U08 u1_num_steps_refine; 1203 } search_candt_t; 1204 1205 /** 1206 ****************************************************************************** 1207 * @struct result_node_t 1208 * @brief Contains complete search result for a CU for a given type of 1209 * partition split. Holds ptrs to results for each partition, with 1210 * information of partition type. 1211 ****************************************************************************** 1212 */ 1213 typedef struct 1214 { 1215 /** 1216 * Type of partition that the CU is split into, for which this 1217 * result is relevant 1218 */ 1219 PART_TYPE_T e_part_type; 1220 1221 /** 1222 * Total cost of coding the CU (sum of costs of individual partitions 1223 * plus other possible CU level overheads) 1224 */ 1225 S32 i4_tot_cost; 1226 1227 /** 1228 * Pointer to results of each individual partitions. Note that max 1229 * number of partitions a CU can be split into is MAX_NUM_PARTS 1230 */ 1231 search_node_t *ps_part_result[MAX_NUM_PARTS]; 1232 1233 /* TU split flag : tu_split_flag[0] represents the transform splits 1234 * for CU size <= 32, for 64x64 each ai4_tu_split_flag corresponds 1235 * to respective 32x32 */ 1236 S32 ai4_tu_split_flag[4]; 1237 1238 } result_node_t; 1239 1240 /** 1241 ****************************************************************************** 1242 * @struct ctb_node_t 1243 * @brief Finalized information for a given CU or CTB. This is a recursive 1244 * structure and can hence start at CTB level, recursing for every 1245 * level of split till we hit leaf CUs in the CTB. At leaf node 1246 * it contains info for coded non split CU, with child nodes being 1247 * set to NULL 1248 ****************************************************************************** 1249 */ 1250 typedef struct ctb_node_t 1251 { 1252 /** x offset of this CU w.r.t. CTB start (0-63) */ 1253 U08 u1_x_off; 1254 /** y offset of this C U w.r.t. CTB start (0-63) */ 1255 U08 u1_y_off; 1256 /** Results of each partition in both directions L0,L1 */ 1257 search_node_t as_part_results[MAX_NUM_PARTS][2]; 1258 /** 1259 * Pointers to pred buffers. Note that the buffer may be allocated 1260 * at parent level or at this level 1261 */ 1262 U08 *apu1_pred[2]; 1263 /** Prediction direction for each partition: 0-L0, 1-L1, 2-BI */ 1264 U08 u1_pred_dir[MAX_NUM_PARTS]; 1265 /** 1266 * When pred direction is decided to be BI, we still store the best 1267 * uni pred dir (L0/L1) in this array, for RD Opt purposes 1268 */ 1269 U08 u1_best_uni_dir[MAX_NUM_PARTS]; 1270 /** Stride of pred buffer pointed to by apu1_pred member */ 1271 S32 i4_pred_stride; 1272 /** Size of the CU that this node represents */ 1273 CU_SIZE_T e_cu_size; 1274 /** For leaf CUs, this indicats type of partition (for e.g. PRT_2NxN) */ 1275 PART_TYPE_T e_part_type; 1276 /** Below entries are for a CU level*/ 1277 S32 i4_sad; 1278 S32 i4_satd; 1279 S32 i4_mv_cost; 1280 S32 i4_rate; 1281 S32 i4_dist; 1282 S32 i4_tot_cost; 1283 /** Best costs of each partitions, if partition is BI, then best cost across uni/bi */ 1284 S32 ai4_part_costs[4]; 1285 1286 /* TU split flag : tu_split_flag[0] represents the transform splits 1287 * for CU size <= 32, for 64x64 each ai4_tu_split_flag corresponds 1288 * to respective 32x32 */ 1289 /* For a 8x8 TU - 1 bit used to indicate split */ 1290 /* For a 16x16 TU - LSB used to indicate winner between 16 and 8 TU's. 4 other bits used to indicate split in each 8x8 quadrant */ 1291 /* For a 32x32 TU - See above */ 1292 S32 ai4_tu_split_flag[4]; 1293 1294 /** 1295 * pointers to child nodes. If this node is split, then the below point 1296 * to children nodes (TL, TR, BL, BR) each of quarter size (w/2, h/2) 1297 * If this node not split, then below point to null 1298 */ 1299 struct ctb_node_t *ps_tl; 1300 struct ctb_node_t *ps_tr; 1301 struct ctb_node_t *ps_bl; 1302 struct ctb_node_t *ps_br; 1303 } ctb_node_t; 1304 1305 /** 1306 ****************************************************************************** 1307 * @struct ctb_mem_mgr_t 1308 * @brief Memory manager structure for CTB level memory allocations of CTB 1309 * nodes 1310 ****************************************************************************** 1311 */ 1312 typedef struct 1313 { 1314 /** Base memory ptr */ 1315 U08 *pu1_mem; 1316 /** Amount used so far (running value) */ 1317 S32 i4_used; 1318 /** Total memory available for this mem mgr */ 1319 S32 i4_tot; 1320 1321 /** Size of CTB node, and alignment requiremnts */ 1322 S32 i4_size; 1323 S32 i4_align; 1324 } ctb_mem_mgr_t; 1325 1326 /** 1327 ****************************************************************************** 1328 * @struct buf_mgr_t 1329 * @brief Memory manager structure for CTB level buffer allocations on the 1330 * fly, esp useful for pred bufs and working memory 1331 ****************************************************************************** 1332 */ 1333 typedef struct 1334 { 1335 /** base memory ptr */ 1336 U08 *pu1_wkg_mem; 1337 /** total memory available */ 1338 S32 i4_total; 1339 /** Memory used so far */ 1340 S32 i4_used; 1341 } buf_mgr_t; 1342 1343 /** 1344 ****************************************************************************** 1345 * @struct pred_candt_nodes_t 1346 * @brief For a given partition and a given CU/blk, this has pointers to 1347 * all the neighbouring and coloc pred candts. All the pred candts 1348 * are stored as search_node_t structures itself. 1349 ****************************************************************************** 1350 */ 1351 typedef struct 1352 { 1353 search_node_t *ps_tl; 1354 search_node_t *ps_t; 1355 search_node_t *ps_tr; 1356 search_node_t *ps_bl; 1357 search_node_t *ps_l; 1358 search_node_t *ps_coloc; 1359 search_node_t *ps_zeromv; 1360 search_node_t **pps_proj_coloc; 1361 1362 search_node_t *ps_mvp_node; 1363 } pred_candt_nodes_t; 1364 1365 /** 1366 ****************************************************************************** 1367 * @struct pred_ctxt_t 1368 * @brief For a given CU/blk, has complete prediction information for all 1369 * types of partitions. Note that the pred candts are only pointed 1370 * to, not actually stored here. This indirection is to avoid 1371 * copies after each partition search, this way, the result of 1372 * a partition is updated and the causally next partition 1373 * automatically uses this result 1374 ****************************************************************************** 1375 */ 1376 typedef struct 1377 { 1378 pred_candt_nodes_t as_pred_nodes[TOT_NUM_PARTS]; 1379 1380 /** 1381 * We use S + lambda * R to evaluate cost. Here S = SAD/SATD and lambda 1382 * is the scaling of bits to S and R is bits of overhead (MV + mode). 1383 * Choice of lambda depends on open loop / closed loop, Qp, temporal id 1384 * and possibly CU depth. It is the caller's responsiblity to pass 1385 * to this module the appropriate lambda. 1386 */ 1387 S32 lambda; 1388 1389 /** lambda is in Q format, so this is the downshift reqd */ 1390 S32 lambda_q_shift; 1391 1392 /** Prediction direction : PRED_L0 or PRED_L1 */ 1393 S32 pred_lx; 1394 1395 /** MV resolution: FPEL, HPEL or QPEL */ 1396 S32 mv_pel; 1397 1398 /** Points to the ref bits lookup 1 ptr for each PRED_Lx */ 1399 U08 **ppu1_ref_bits_tlu; 1400 1401 /** 1402 * Points to the ref scale factor, for a given ref id k, 1403 * to scale as per ref id m, we use entry k+MAX_NUM_REF*m 1404 */ 1405 S16 *pi2_ref_scf; 1406 1407 /** 1408 * Flag that indicates whether T, TR and TL candidates used 1409 * are causal or projected 1410 */ 1411 U08 proj_used; 1412 1413 } pred_ctxt_t; 1414 1415 /** 1416 ****************************************************************************** 1417 * @struct search_results_t 1418 * @brief For a given CU/blk, Stores all the results of ME search. Results 1419 * are stored per partition, also the best results for CU are stored 1420 * across partitions. 1421 ****************************************************************************** 1422 */ 1423 typedef struct 1424 { 1425 /** Size of CU for which this structure used */ 1426 CU_SIZE_T e_cu_size; 1427 1428 /** 1429 * X and y offsets w.r.t. CTB start in encode layers. For non encode 1430 * layers, these may typically be 0 1431 */ 1432 U08 u1_x_off; 1433 U08 u1_y_off; 1434 1435 /** Number of best results for this CU stored */ 1436 U08 u1_num_best_results; 1437 1438 /** Number of results stored per partition. */ 1439 U08 u1_num_results_per_part; 1440 1441 /** 1442 * Number of result planes active. This may be different from total 1443 * number of active references during search. For example, we may 1444 * have 4 active ref, 2 ineach dirn, but active result planes may 1445 * only be 2, one for L0 and 1 for L1 1446 */ 1447 U08 u1_num_active_ref; 1448 /** 1449 * mask of active partitions, Totally 17 bits. For a given partition 1450 * id, as per PART_ID_T enum the corresponding bit position is 1/0 1451 * indicating that partition is active or inactive 1452 */ 1453 S32 i4_part_mask; 1454 1455 /** Points to partial results for each partition id 1456 * Temporary hack for the bug: If +1 is not kept, 1457 * it doesn't bit match with older version 1458 */ 1459 search_node_t *aps_part_results[MAX_NUM_REF][TOT_NUM_PARTS]; 1460 1461 /** 1462 * Ptr to best results for the current CU post bi pred evaluation and 1463 * intra mode insertions 1464 */ 1465 inter_cu_results_t *ps_cu_results; 1466 1467 /** 2 pred ctxts, one for L0 and one for L1 */ 1468 pred_ctxt_t as_pred_ctxt[2]; 1469 1470 /** 1471 * Pointer to a table that indicates whether the ref id 1472 * corresponds to past or future dirn. Input is ref id Lc form 1473 */ 1474 1475 U08 *pu1_is_past; 1476 1477 /** 1478 * Overall best CU cost, while other entries store CU costs 1479 * in single direction, this is best CU cost, where each 1480 * partition cost is evaluated as best of uni/bi 1481 */ 1482 S32 best_cu_cost; 1483 1484 /** 1485 * Split_flag which is used for deciding if 16x16 CU is split or not 1486 */ 1487 U08 u1_split_flag; 1488 } search_results_t; 1489 1490 /** 1491 ****************************************************************************** 1492 * @struct ctb_list_t 1493 * @brief Tree structure containing info for entire CTB. At top level 1494 * it points to entire CTB results, with children nodes at each lvl 1495 * being non null if split. 1496 ****************************************************************************** 1497 */ 1498 typedef struct ctb_list_t 1499 { 1500 /** Indicates whether this level split further */ 1501 U08 u1_is_split; 1502 1503 /** Number of result candts present */ 1504 U08 u1_num_candts; 1505 1506 /** 1507 * Whether this level valid. E.g. if we are at boundary, where only 1508 * left 2 32x32 are within pic boundary, then the parent is force split 1509 * at the children level, TR and BR are invalid. 1510 */ 1511 U08 u1_is_valid; 1512 1513 /** 1514 * IF this level is 16x16 then this mask indicates which 8x8 blks 1515 * are valid 1516 */ 1517 U08 u1_8x8_mask; 1518 1519 /** Search results of this CU */ 1520 search_results_t *ps_search_results; 1521 1522 /** Search results of this CU */ 1523 inter_cu_results_t *ps_cu_results; 1524 1525 /** Pointers to leaf nodes, if CU is split further, else null */ 1526 struct ctb_list_t *ps_tl; 1527 struct ctb_list_t *ps_tr; 1528 struct ctb_list_t *ps_bl; 1529 struct ctb_list_t *ps_br; 1530 } ctb_list_t; 1531 1532 /** 1533 ****************************************************************************** 1534 * @struct layer_mv_t 1535 * @brief mv bank structure for a particular layer 1536 ****************************************************************************** 1537 */ 1538 typedef struct 1539 { 1540 /** Number of mvs for a given ref/pred dirn */ 1541 S32 i4_num_mvs_per_ref; 1542 /** Number of reference for which results stored */ 1543 S32 i4_num_ref; 1544 /** Number of mvs stored per blk. Product of above two */ 1545 S32 i4_num_mvs_per_blk; 1546 /** Block size of the unit for which MVs stored */ 1547 BLK_SIZE_T e_blk_size; 1548 /** Number of blocks present per row */ 1549 S32 i4_num_blks_per_row; 1550 1551 /** Number of mvs stored every row */ 1552 S32 i4_num_mvs_per_row; 1553 1554 /** 1555 * Max number of mvs allowed per row. The main purpose of this variable 1556 * is to resolve or detect discrepanceis between allocation time mem 1557 * and run time mem, when alloc time resolution and run time resolution 1558 * may be different 1559 */ 1560 S32 max_num_mvs_per_row; 1561 1562 /** 1563 * Pointer to mvs of 0, 0 blk, This is different from base since the 1564 * mv bank is padded all sides 1565 */ 1566 hme_mv_t *ps_mv; 1567 1568 /** Pointer to base of mv bank mvs */ 1569 hme_mv_t *ps_mv_base; 1570 1571 /** Pointers to ref idx.One to one correspondence between this and ps_mv*/ 1572 S08 *pi1_ref_idx; 1573 /** Base of ref ids just like in case of ps_mv */ 1574 S08 *pi1_ref_idx_base; 1575 1576 /** Part mask for every blk, if stored, 1 per blk */ 1577 U08 *pu1_part_mask; 1578 } layer_mv_t; 1579 1580 /** 1581 ****************************************************************************** 1582 * @struct mv_hist_t 1583 * @brief Histogram structure to calculate global mvs 1584 ****************************************************************************** 1585 */ 1586 typedef struct 1587 { 1588 S32 i4_num_rows; 1589 S32 i4_num_cols; 1590 S32 i4_shift_x; 1591 S32 i4_shift_y; 1592 S32 i4_lobe1_size; 1593 S32 i4_lobe2_size; 1594 S32 i4_min_x; 1595 S32 i4_min_y; 1596 S32 i4_num_bins; 1597 S32 ai4_bin_count[MAX_NUM_BINS]; 1598 } mv_hist_t; 1599 1600 typedef struct 1601 { 1602 U08 u1_is_past; 1603 } ref_attr_t; 1604 1605 /** 1606 ****************************************************************************** 1607 * @struct layer_ctxt_t 1608 * @brief Complete information for the layer 1609 ****************************************************************************** 1610 */ 1611 typedef struct 1612 { 1613 /** Display Width of this layer */ 1614 S32 i4_disp_wd; 1615 /** Display height of this layer */ 1616 S32 i4_disp_ht; 1617 /** Width of this layer */ 1618 S32 i4_wd; 1619 /** height of this layer */ 1620 S32 i4_ht; 1621 /** Amount of padding of input in x dirn */ 1622 S32 i4_pad_x_inp; 1623 /** Amount of padding of input in y dirn */ 1624 S32 i4_pad_y_inp; 1625 /** Padding amount of recon in x dirn */ 1626 S32 i4_pad_x_rec; 1627 /** padding amt of recon in y dirn */ 1628 S32 i4_pad_y_rec; 1629 1630 /** 1631 * Offset for recon. Since recon has padding, the 0, 0 start differs 1632 * from base of buffer 1633 */ 1634 S32 i4_rec_offset; 1635 /** Offset for input, same explanation as recon */ 1636 S32 i4_inp_offset; 1637 /** stride of input buffer */ 1638 S32 i4_inp_stride; 1639 /** stride of recon buffer */ 1640 S32 i4_rec_stride; 1641 /** Pic order count */ 1642 S32 i4_poc; 1643 /** input pointer. */ 1644 U08 *pu1_inp; 1645 /** Base of input. Add inp_offset to go to 0, 0 locn */ 1646 U08 *pu1_inp_base; 1647 1648 /** Pointer to 4 hpel recon planes */ 1649 U08 *pu1_rec_fxfy; 1650 U08 *pu1_rec_hxfy; 1651 U08 *pu1_rec_fxhy; 1652 U08 *pu1_rec_hxhy; 1653 1654 /** Global mv, one set per reference searched */ 1655 hme_mv_t s_global_mv[MAX_NUM_REF][NUM_GMV_LOBES]; 1656 1657 /** Layer MV bank */ 1658 layer_mv_t *ps_layer_mvbank; 1659 1660 /** Pointer to list of recon buffers for each ref id, one ptr per plane */ 1661 U08 **ppu1_list_rec_fxfy; 1662 U08 **ppu1_list_rec_hxfy; 1663 U08 **ppu1_list_rec_fxhy; 1664 U08 **ppu1_list_rec_hxhy; 1665 1666 void **ppv_dep_mngr_recon; 1667 1668 /** Pointer to list of input buffers for each ref id, one ptr per plane */ 1669 U08 **ppu1_list_inp; 1670 1671 /** Max MV in x and y direction supported at this layer resolution */ 1672 S16 i2_max_mv_x; 1673 S16 i2_max_mv_y; 1674 1675 /** Converts ref id (as per Lc list) to POC */ 1676 S32 ai4_ref_id_to_poc_lc[MAX_NUM_REF]; 1677 1678 S32 ai4_ref_id_to_disp_num[MAX_NUM_REF]; 1679 1680 /** status of the buffer */ 1681 S32 i4_is_free; 1682 1683 /** idr gop number */ 1684 S32 i4_idr_gop_num; 1685 1686 /** is reference picture */ 1687 S32 i4_is_reference; 1688 1689 /** is non reference picture processed by me*/ 1690 S32 i4_non_ref_free; 1691 1692 } layer_ctxt_t; 1693 1694 typedef S32 (*PF_MV_COST_FXN)(search_node_t *, pred_ctxt_t *, PART_ID_T, S32); 1695 1696 /** 1697 ****************************************************************************** 1698 * @struct refine_prms_t 1699 * @brief All the configurable input parameters for the refinement layer 1700 * 1701 * @param encode: Whether this layer is encoded or not 1702 * @param explicit_ref: If enabled, then the number of reference frames to 1703 * be searched is a function of coarsest layer num ref 1704 frames. Else, number of references collapsed to 1/2 1705 * @param i4_num_fpel_results : Number of full pel results to be allowed 1706 * @param i4_num_results_per_part: Number of results stored per partition 1707 * @param e_search_complexity: Decides the number of initial candts, refer 1708 * to SEARCH_COMPLEXITY_T 1709 * @param i4_use_rec_in_fpel: Whether to use input buf or recon buf in fpel 1710 * @param i4_enable_4x4_part : if encode is 0, we use 8x8 blks, if this param 1711 enabled, then we do 4x4 partial sad update 1712 * @param i4_layer_id : id of this layer (0 = finest) 1713 * @param i4_num_32x32_merge_results: number of 32x32 merged results stored 1714 * @param i4_num_64x64_merge_results: number of 64x64 merged results stored 1715 * @param i4_use_satd_cu_merge: Use SATD during CU merge 1716 * @param i4_num_steps_hpel_refine : Number of steps during hpel refinement 1717 * @param i4_num_steps_qpel_refine : Same as above but for qpel 1718 * @param i4_use_satd_subpel : Use of SATD or SAD for subpel 1719 ****************************************************************************** 1720 */ 1721 typedef struct 1722 { 1723 /* This array is used to place upper bounds on the number of search candidates */ 1724 /* that can be used per 'search cand location' */ 1725 U08 au1_num_fpel_search_cands[NUM_SEARCH_CAND_LOCATIONS]; 1726 1727 U08 u1_max_2nx2n_tu_recur_cands; 1728 1729 U08 u1_max_num_fpel_refine_centers; 1730 1731 U08 u1_max_num_subpel_refine_centers; 1732 1733 S32 i4_encode; 1734 S32 explicit_ref; 1735 S32 i4_num_ref_fpel; 1736 S32 i4_num_fpel_results; 1737 1738 S32 i4_num_results_per_part; 1739 1740 S32 i4_num_mvbank_results; 1741 SEARCH_COMPLEXITY_T e_search_complexity; 1742 S32 i4_use_rec_in_fpel; 1743 1744 S32 i4_enable_4x4_part; 1745 S32 i4_layer_id; 1746 1747 S32 i4_num_32x32_merge_results; 1748 S32 i4_num_64x64_merge_results; 1749 1750 S32 i4_use_satd_cu_merge; 1751 1752 S32 i4_num_steps_post_refine_fpel; 1753 S32 i4_num_steps_fpel_refine; 1754 S32 i4_num_steps_hpel_refine; 1755 S32 i4_num_steps_qpel_refine; 1756 S32 i4_use_satd_subpel; 1757 1758 double *pd_intra_costs; 1759 S32 bidir_enabled; 1760 S32 lambda_inp; 1761 S32 lambda_recon; 1762 S32 lambda_q_shift; 1763 1764 S32 limit_active_partitions; 1765 1766 S32 sdi_threshold; 1767 1768 U08 u1_use_lambda_derived_from_min_8x8_act_in_ctb; 1769 1770 U08 u1_max_subpel_candts; 1771 1772 U08 u1_max_subpel_candts_2Nx2N; 1773 U08 u1_max_subpel_candts_NxN; 1774 1775 U08 u1_subpel_candt_threshold; 1776 1777 /* Pointer to the array which has num best results for 1778 fpel refinement */ 1779 U08 *pu1_num_best_results; 1780 1781 } refine_prms_t; 1782 1783 /** 1784 ****************************************************************************** 1785 * @struct coarse_prms_t 1786 * @brief All the parameters passed to coarse layer search 1787 ****************************************************************************** 1788 */ 1789 typedef struct 1790 { 1791 /** ID of this layer, typically N-1 where N is tot layers */ 1792 S32 i4_layer_id; 1793 1794 /** Initial step size, valid if full search disabled */ 1795 S32 i4_start_step; 1796 1797 /** Maximum number of iterations to consider if full search disabled */ 1798 S32 i4_max_iters; 1799 1800 /** Number of reference frames to search */ 1801 S32 i4_num_ref; 1802 1803 /** Number of best results to maintain at this layer for projection */ 1804 S32 num_results; 1805 1806 /** 1807 * Enable or disable full search, if disabled then, we search around initial 1808 * candidates with early exit 1809 */ 1810 S32 do_full_search; 1811 1812 /** Values of lambda and the Q format */ 1813 S32 lambda; 1814 S32 lambda_q_shift; 1815 1816 /** Step size for full search 2/4 */ 1817 S32 full_search_step; 1818 1819 } coarse_prms_t; 1820 1821 typedef struct 1822 { 1823 /** 1824 * These pointers point to modified input, one each for one ref idx. 1825 * Instead of weighting the reference, we weight the input with inverse 1826 * wt and offset. 1827 * +1 for storing non weighted input 1828 */ 1829 U08 *apu1_wt_inp[MAX_NUM_REF + 1]; 1830 1831 /* These are allocated once at the start of encoding */ 1832 /* These are necessary only if wt_pred is switched on */ 1833 /* Else, only a single buffer is used to store the */ 1834 /* unweighed input */ 1835 U08 *apu1_wt_inp_buf_array[MAX_NUM_REF + 1]; 1836 1837 /** Stores the weights and offsets for each ref */ 1838 S32 a_wpred_wt[MAX_NUM_REF]; 1839 S32 a_inv_wpred_wt[MAX_NUM_REF]; 1840 S32 a_wpred_off[MAX_NUM_REF]; 1841 S32 wpred_log_wdc; 1842 1843 S32 ai4_shift_val[MAX_NUM_REF]; 1844 } wgt_pred_ctxt_t; 1845 1846 /** 1847 ****************************************************************************** 1848 * @struct mv_refine_ctxt_t 1849 * @brief This structure contains important parameters used motion vector 1850 refinement 1851 ****************************************************************************** 1852 */ 1853 typedef struct 1854 { 1855 /* Added +7 in the array sizes below to make every array dimension 1856 16-byte aligned */ 1857 /** Cost of best candidate for each partition*/ 1858 MEM_ALIGN16 WORD16 i2_tot_cost[2][TOT_NUM_PARTS + 7]; 1859 1860 MEM_ALIGN16 WORD16 i2_stim_injected_cost[2][TOT_NUM_PARTS + 7]; 1861 1862 /** Motion vector cost for the best candidate of each partition*/ 1863 MEM_ALIGN16 WORD16 i2_mv_cost[2][TOT_NUM_PARTS + 7]; 1864 /** X component of the motion vector of the best candidate of each partition*/ 1865 MEM_ALIGN16 WORD16 i2_mv_x[2][TOT_NUM_PARTS + 7]; 1866 /** Y component of the motion vector of the best candidate of each partition*/ 1867 MEM_ALIGN16 WORD16 i2_mv_y[2][TOT_NUM_PARTS + 7]; 1868 /** Reference index of the best candidate of each partition*/ 1869 MEM_ALIGN16 WORD16 i2_ref_idx[2][TOT_NUM_PARTS + 7]; 1870 1871 /** Partition id for the various partitions*/ 1872 WORD32 ai4_part_id[TOT_NUM_PARTS + 1]; 1873 /** Indicates the total number of valid partitions*/ 1874 WORD32 i4_num_valid_parts; 1875 1876 /** Number of candidates to refine through*/ 1877 WORD32 i4_num_search_nodes; 1878 1879 /** Stores the satd at the end of fullpel refinement*/ 1880 WORD16 ai2_fullpel_satd[2][TOT_NUM_PARTS]; 1881 } mv_refine_ctxt_t; 1882 1883 typedef mv_refine_ctxt_t fullpel_refine_ctxt_t; 1884 typedef mv_refine_ctxt_t subpel_refine_ctxt_t; 1885 /** 1886 ****************************************************************************** 1887 * @struct hme_search_prms_t 1888 * @brief All prms going to any fpel search 1889 ****************************************************************************** 1890 */ 1891 typedef struct 1892 { 1893 /** for explicit search, indicates which ref frm to search */ 1894 /** for implicit search, indicates the prediction direction for search */ 1895 S08 i1_ref_idx; 1896 1897 /** Blk size used for search, and for which the search is done */ 1898 BLK_SIZE_T e_blk_size; 1899 1900 /** Number of init candts being searched */ 1901 S32 i4_num_init_candts; 1902 1903 S32 i4_num_steps_post_refine; 1904 1905 /** 1906 * For coarser searches, bigger refinement is done around each candt 1907 * in these cases, this prm has start step 1908 */ 1909 S32 i4_start_step; 1910 1911 /** whether SATD to be used for srch */ 1912 S32 i4_use_satd; 1913 1914 /** if 1, we use recon frm for search (closed loop ) */ 1915 S32 i4_use_rec; 1916 1917 /** bitmask of active partitions */ 1918 S32 i4_part_mask; 1919 1920 /** x and y offset of blk w.r.t. pic start */ 1921 S32 i4_x_off; 1922 S32 i4_y_off; 1923 1924 /** 1925 * max number of iterations to search if early exit not hit 1926 * relevant only for coarser searches 1927 */ 1928 S32 i4_max_iters; 1929 1930 /** pointer to str holding all results for this blk */ 1931 search_results_t *ps_search_results; 1932 1933 /** pts to str having all search candt with refinement info */ 1934 search_candt_t *ps_search_candts; 1935 /** pts to str having valid mv range info for this blk */ 1936 range_prms_t *aps_mv_range[MAX_NUM_REF]; 1937 /** cost compute fxnptr */ 1938 PF_MV_COST_FXN pf_mv_cost_compute; 1939 1940 /** when this str is set up for full search, indicates step size for same */ 1941 S32 full_search_step; 1942 1943 /** stride ofinp buffer */ 1944 S32 i4_inp_stride; 1945 1946 /** x and y offset of cu w.r.t. ctb start, set to 0 for non enc layer */ 1947 S32 i4_cu_x_off; 1948 S32 i4_cu_y_off; 1949 1950 /** base pointer to the de-duplicated search nodes */ 1951 search_node_t *ps_search_nodes; 1952 1953 /** number of de-duplicated nodes to be searched */ 1954 S32 i4_num_search_nodes; 1955 1956 fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt; 1957 1958 U32 au4_src_variance[TOT_NUM_PARTS]; 1959 1960 S32 i4_alpha_stim_multiplier; 1961 1962 U08 u1_is_cu_noisy; 1963 1964 ULWORD64 *pu8_part_src_sigmaX; 1965 ULWORD64 *pu8_part_src_sigmaXSquared; 1966 1967 } hme_search_prms_t; 1968 1969 /** 1970 ****************************************************************************** 1971 * @struct hme_err_prms_t 1972 * @brief This is input prms struct for SAD/SATD computation 1973 ****************************************************************************** 1974 */ 1975 typedef struct 1976 { 1977 /** Ptr to input blk for which err computed */ 1978 U08 *pu1_inp; 1979 1980 U16 *pu2_inp; 1981 1982 /** Ptr to ref blk after adjusting for mv and coordinates in pic */ 1983 U08 *pu1_ref; 1984 1985 U16 *pu2_ref; 1986 1987 /** Stride of input buffer */ 1988 S32 i4_inp_stride; 1989 /** Stride of ref buffer */ 1990 S32 i4_ref_stride; 1991 /** Mask of active partitions. */ 1992 S32 i4_part_mask; 1993 /** Mask of active grid pts. Refer to GRID_PT_T enum for bit posns */ 1994 S32 i4_grid_mask; 1995 /** 1996 * Pointer to SAD Grid where SADs for each partition are stored. 1997 * The layout is as follows: If there are M total partitions 1998 * and N active pts in the grid, then the first N results contain 1999 * first partition, e.g. 2Nx2N. Next N results contain 2nd partitino 2000 * sad, e.g. 2NxN_T. Totally we have MxN results. 2001 * Note: The active partition count may be lesser than M, still we 2002 * have results for M partitions 2003 */ 2004 S32 *pi4_sad_grid; 2005 2006 /** Pointer to TU_SPLIT grid flags */ 2007 S32 *pi4_tu_split_flags; 2008 2009 /** Pointer to the Child's satd cost */ 2010 S32 *pi4_child_cost; 2011 2012 /** pointer to the child'd TU_split flags */ 2013 S32 *pi4_child_tu_split_flags; 2014 2015 /** pointer to the child'd TU_early_cbf flags */ 2016 S32 *pi4_child_tu_early_cbf; 2017 2018 /** Pointer to TU early CBF flags */ 2019 S32 *pi4_tu_early_cbf; 2020 2021 /** pointer to the early cbf thresholds */ 2022 S32 *pi4_tu_early_cbf_threshold; 2023 2024 /** store the DC value */ 2025 S32 i4_dc_val; 2026 2027 /** Block width and ht of the block being evaluated for SAD */ 2028 S32 i4_blk_wd; 2029 S32 i4_blk_ht; 2030 2031 /** 2032 * Array of valid partition ids. E.g. if 2 partitions active, 2033 * then there will be 3 entries, 3rd entry being -1 2034 */ 2035 S32 *pi4_valid_part_ids; 2036 /** Step size of the grid */ 2037 S32 i4_step; 2038 2039 /* Number of partitions */ 2040 S32 i4_num_partitions; 2041 2042 /** Store the tu_spli_flag cost */ 2043 S32 i4_tu_split_cost; 2044 2045 /** The max_depth for inter tu_tree */ 2046 U08 u1_max_tr_depth; 2047 2048 U08 u1_max_tr_size; 2049 2050 /** Scratch memory for Doing hadamard */ 2051 U08 *pu1_wkg_mem; 2052 2053 ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list; 2054 2055 } err_prms_t; 2056 2057 typedef struct grid 2058 { 2059 WORD32 num_grids; /* Number of grid to work with */ 2060 WORD32 ref_buf_stride; /* Buffer stride of reference buffer */ 2061 WORD32 2062 grd_sz_y_x; /* Packed 16 bits indicating grid spacing in y & x direction <--grid-size-y--><--grid-size-x--> */ 2063 UWORD8 **ppu1_ref_ptr; /* Center point for the grid search */ 2064 WORD32 *pi4_grd_mask; /* Mask indicating which grid points need to be evaluated */ 2065 hme_mv_t *p_mv; /* <--MVy--><--MVx--> */ 2066 WORD32 *p_ref_idx; /* Ref idx to which the grid is pointing */ 2067 } grid_ctxt_t; 2068 2069 typedef struct cand 2070 { 2071 hme_mv_t mv; /* MV corresponding to the candidate <--MVy--><--MVx--> */ 2072 WORD32 ref_idx; /* Ref idx corresponding to the candidate */ 2073 WORD32 grid_ix; /* Grid to which this candidate belongs */ 2074 UWORD8 *pu1_ref_ptr; /* Pointer to the candidate */ 2075 } cand_t; 2076 2077 /** 2078 ****************************************************************************** 2079 * @struct hme_ctb_prms_t 2080 * @brief Parameters to create the CTB list, which is a tree structure 2081 ****************************************************************************** 2082 */ 2083 typedef struct 2084 { 2085 /** 2086 * These parameters cover number of input 16x16, 32x32 and 64x64 results 2087 * and the number of output results that are mix of all above CU sizes. 2088 * i4_num_kxk_unified_out is relevant only if we are sending multiple CU 2089 * sizes for same region for RD Opt. 2090 */ 2091 S32 i4_num_16x16_in; 2092 S32 i4_num_32x32_in; 2093 S32 i4_num_32x32_unified_out; 2094 S32 i4_num_64x64_in; 2095 S32 i4_num_64x64_unified_out; 2096 2097 /** Pointers to results at differen CU sizes */ 2098 search_results_t *ps_search_results_16x16; 2099 search_results_t *ps_search_results_32x32; 2100 search_results_t *ps_search_results_64x64; 2101 2102 S32 i4_num_part_type; 2103 2104 /** Indicates whether we have split at 64x64 level */ 2105 S32 i4_cu_64x64_split; 2106 /** Indicates whether each of the 32x32 CU is split */ 2107 S32 ai4_cu_32x32_split[4]; 2108 2109 /** X and y offset of the CTB */ 2110 S32 i4_ctb_x; 2111 S32 i4_ctb_y; 2112 2113 /** 2114 * Memory manager for the CTB that is responsible for node allocation 2115 * at a CU level 2116 */ 2117 ctb_mem_mgr_t *ps_ctb_mem_mgr; 2118 2119 /** Buffer manager that is responsible for memory allocation (pred bufs) */ 2120 buf_mgr_t *ps_buf_mgr; 2121 } hme_ctb_prms_t; 2122 2123 /** 2124 ****************************************************************************** 2125 * @struct result_upd_prms_t 2126 * @brief Updation of results 2127 ****************************************************************************** 2128 */ 2129 typedef struct 2130 { 2131 /** Cost compuatation function ponter */ 2132 PF_MV_COST_FXN pf_mv_cost_compute; 2133 2134 /** Points to the SAD grid updated during SAD compute fxn */ 2135 S32 *pi4_sad_grid; 2136 2137 /** Points to the TU_SPLIT grid updates duting the SATD TU REC fxn */ 2138 S32 *pi4_tu_split_flags; 2139 2140 /** 2141 * This is the central mv of the grid. For e.g. if we have a 3x3 grid, 2142 * this covers the central pt's mv in the grid. 2143 */ 2144 const search_node_t *ps_search_node_base; 2145 2146 /** Search results structure updated by the result update fxn */ 2147 search_results_t *ps_search_results; 2148 2149 /** List of active partitions, only these are processed and updated */ 2150 S32 *pi4_valid_part_ids; 2151 2152 /** Reference id for this candt and grid */ 2153 S08 i1_ref_idx; 2154 2155 /** Mask of active pts in the grid */ 2156 S32 i4_grid_mask; 2157 2158 /** 2159 * For early exit reasons we may want to know the id of the least candt 2160 * This will correspond to id of candt with least cost for 2Nx2N part, 2161 * if multiple partitions enabled, or if 1 part enabled, it will be for 2162 * id of candt of that partition 2163 */ 2164 S32 i4_min_id; 2165 2166 /** Step size of the grid */ 2167 S32 i4_step; 2168 2169 /** Mask of active partitions */ 2170 S32 i4_part_mask; 2171 2172 /** Min cost corresponding to min id */ 2173 S32 i4_min_cost; 2174 2175 /** Store the motion vectors in qpel unit*/ 2176 S16 i2_mv_x; 2177 2178 S16 i2_mv_y; 2179 2180 U08 u1_pred_lx; 2181 2182 subpel_refine_ctxt_t *ps_subpel_refine_ctxt; 2183 2184 /** Current candidate in the subpel refinement process*/ 2185 search_node_t *ps_search_node; 2186 2187 } result_upd_prms_t; 2188 2189 /** 2190 ****************************************************************************** 2191 * @struct mv_grid_t 2192 * @brief Grid of MVs storing results for a CTB and neighbours. For a CTB 2193 * of size 64x64, we may store upto 16x16 mvs (one for each 4x4) 2194 * along with 1 neighbour on each side. Valid only for encode layer 2195 ****************************************************************************** 2196 */ 2197 typedef struct 2198 { 2199 /** All the mvs in the grid */ 2200 search_node_t as_node[NUM_MVS_IN_CTB_GRID]; 2201 2202 /** Stride of the grid */ 2203 S32 i4_stride; 2204 2205 /** Start offset of the 0,0 locn in CTB. */ 2206 S32 i4_start_offset; 2207 } mv_grid_t; 2208 2209 typedef struct 2210 { 2211 /* centroid's (x, y) co-ordinates in Q8 format */ 2212 WORD32 i4_pos_x_q8; 2213 2214 WORD32 i4_pos_y_q8; 2215 } centroid_t; 2216 2217 typedef struct 2218 { 2219 S16 min_x; 2220 2221 S16 min_y; 2222 2223 S16 max_x; 2224 2225 S16 max_y; 2226 2227 /* The cumulative sum of partition sizes of the mvs */ 2228 /* in this cluster */ 2229 S16 area_in_pixels; 2230 2231 S16 uni_mv_pixel_area; 2232 2233 S16 bi_mv_pixel_area; 2234 2235 mv_data_t as_mv[128]; 2236 2237 U08 num_mvs; 2238 2239 /* Weighted average of all mvs in the cluster */ 2240 centroid_t s_centroid; 2241 2242 S08 ref_id; 2243 2244 S32 max_dist_from_centroid; 2245 2246 U08 is_valid_cluster; 2247 2248 } cluster_data_t; 2249 2250 typedef struct 2251 { 2252 cluster_data_t as_cluster_data[MAX_NUM_CLUSTERS_16x16]; 2253 2254 U08 num_clusters; 2255 2256 U08 au1_num_clusters[MAX_NUM_REF]; 2257 2258 S16 intra_mv_area; 2259 2260 S32 best_inter_cost; 2261 2262 } cluster_16x16_blk_t; 2263 2264 typedef struct 2265 { 2266 cluster_data_t as_cluster_data[MAX_NUM_CLUSTERS_32x32]; 2267 2268 U08 num_clusters; 2269 2270 U08 au1_num_clusters[MAX_NUM_REF]; 2271 2272 S16 intra_mv_area; 2273 2274 S08 best_uni_ref; 2275 2276 S08 best_alt_ref; 2277 2278 S32 best_inter_cost; 2279 2280 U08 num_refs; 2281 2282 U08 num_clusters_with_weak_sdi_density; 2283 2284 } cluster_32x32_blk_t; 2285 2286 typedef struct 2287 { 2288 cluster_data_t as_cluster_data[MAX_NUM_CLUSTERS_64x64]; 2289 2290 U08 num_clusters; 2291 2292 U08 au1_num_clusters[MAX_NUM_REF]; 2293 2294 S16 intra_mv_area; 2295 2296 S08 best_uni_ref; 2297 2298 S08 best_alt_ref; 2299 2300 S32 best_inter_cost; 2301 2302 U08 num_refs; 2303 2304 } cluster_64x64_blk_t; 2305 2306 typedef struct 2307 { 2308 cluster_16x16_blk_t *ps_16x16_blk; 2309 2310 cluster_32x32_blk_t *ps_32x32_blk; 2311 2312 cluster_64x64_blk_t *ps_64x64_blk; 2313 2314 cur_ctb_cu_tree_t *ps_cu_tree_root; 2315 ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb; 2316 S32 nodes_created_in_cu_tree; 2317 2318 S32 *pi4_blk_8x8_mask; 2319 2320 S32 blk_32x32_mask; 2321 2322 S32 sdi_threshold; 2323 2324 S32 i4_frame_qstep; 2325 2326 S32 i4_frame_qstep_multiplier; 2327 2328 U08 au1_is_16x16_blk_split[16]; 2329 2330 S32 ai4_part_mask[16]; 2331 2332 } ctb_cluster_info_t; 2333 2334 /** 2335 ****************************************************************************** 2336 * @struct hme_merge_prms_t 2337 * @brief All parameters related to the merge process 2338 ****************************************************************************** 2339 */ 2340 typedef struct 2341 { 2342 /** 2343 * MV Range prms for the merged CU, this may have to be conservative 2344 * in comparison to individual CUs 2345 */ 2346 range_prms_t *aps_mv_range[MAX_NUM_REF]; 2347 2348 /** Pointers to search results of 4 children CUs to be merged */ 2349 search_results_t *ps_results_tl; 2350 search_results_t *ps_results_tr; 2351 search_results_t *ps_results_bl; 2352 search_results_t *ps_results_br; 2353 2354 search_results_t *ps_results_grandchild; 2355 2356 /** Pointer to search results of the parent CU updated during merge */ 2357 search_results_t *ps_results_merge; 2358 2359 inter_cu_results_t *ps_8x8_cu_results; 2360 2361 /** Layer related context */ 2362 layer_ctxt_t *ps_layer_ctxt; 2363 2364 inter_ctb_prms_t *ps_inter_ctb_prms; 2365 2366 /** 2367 * Points to an array of pointers. This array in turn points to 2368 * the active mv grid in each direction (L0/L1) 2369 */ 2370 mv_grid_t **pps_mv_grid; 2371 2372 ctb_cluster_info_t *ps_cluster_info; 2373 2374 S08 *pi1_past_list; 2375 2376 S08 *pi1_future_list; 2377 2378 /** MV cost compute function */ 2379 PF_MV_COST_FXN pf_mv_cost_compute; 2380 2381 /** If segmentation info available for the parent block */ 2382 S32 i4_seg_info_avail; 2383 2384 /** Partition mask (if segmentation info available) */ 2385 S32 i4_part_mask; 2386 2387 /** Number of input results available for the merge proc from children*/ 2388 S32 i4_num_inp_results; 2389 2390 /** Whether SATD to be used for fpel searches */ 2391 S32 i4_use_satd; 2392 2393 /** 2394 * Number of result planes valid for this merge process. For example, 2395 * for fpel search in encode layer, we may have only L0 and L1 2396 */ 2397 S32 i4_num_ref; 2398 2399 /** Whether to use input or recon frm for search */ 2400 S32 i4_use_rec; 2401 2402 /** optimized mv grid flag : indicates if same mvgrid is used for both fpel and qpel 2403 * This helps in copying fpel and qpel mv grid in pred context mv grid 2404 */ 2405 S32 i4_mv_grid_opt; 2406 2407 /** ctb size, typically 32 or 64 */ 2408 S32 log_ctb_size; 2409 2410 S32 i4_ctb_x_off; 2411 2412 S32 i4_ctb_y_off; 2413 2414 ME_QUALITY_PRESETS_T e_quality_preset; 2415 2416 S32 i4_num_pred_dir_actual; 2417 2418 U08 au1_pred_dir_searched[2]; 2419 2420 S32 i4_alpha_stim_multiplier; 2421 2422 U08 u1_is_cu_noisy; 2423 2424 } hme_merge_prms_t; 2425 2426 /** 2427 ****************************************************************************** 2428 * @struct mvbank_update_prms_t 2429 * @brief Useful prms for updating the mv bank 2430 ****************************************************************************** 2431 */ 2432 typedef struct 2433 { 2434 /** Number of references for which update to be done */ 2435 S32 i4_num_ref; 2436 2437 /** 2438 * Search blk size that was used, if this is different from the blk 2439 * size used in mv bank, then some replications or reductions may 2440 * have to be done. E.g. if search blk size is 8x8 and result blk 2441 * size is 4x4, then we have to update part NxN results to be 2442 * used for update along with replication of 2Nx2N result in each 2443 * of the 4 4x4 blk. 2444 */ 2445 BLK_SIZE_T e_search_blk_size; 2446 2447 /** 2448 * Redundant prm as it reflects differences between search blk size 2449 * and mv blk size if any 2450 */ 2451 S32 i4_shift; 2452 2453 S32 i4_num_active_ref_l0; 2454 2455 S32 i4_num_active_ref_l1; 2456 2457 S32 i4_num_results_to_store; 2458 } mvbank_update_prms_t; 2459 2460 /** 2461 ****************************************************************************** 2462 * @struct hme_subpel_prms_t 2463 * @brief input and control prms for subpel refinement 2464 ****************************************************************************** 2465 */ 2466 typedef struct 2467 { 2468 /** Relevant only for the case where we mix up results of diff cu sizes */ 2469 S32 i4_num_16x16_candts; 2470 S32 i4_num_32x32_candts; 2471 S32 i4_num_64x64_candts; 2472 2473 /** X and y offset of ctb w.r.t. start of pic */ 2474 S32 i4_ctb_x_off; 2475 S32 i4_ctb_y_off; 2476 2477 /** Max Number of diamond steps for hpel and qpel refinement */ 2478 S32 i4_num_steps_hpel_refine; 2479 S32 i4_num_steps_qpel_refine; 2480 2481 /** Whether SATD to be used or SAD to be used */ 2482 S32 i4_use_satd; 2483 2484 /** 2485 * Input ptr. This is updated inside the subpel refinement by picking 2486 * up correct adress 2487 */ 2488 void *pv_inp; 2489 2490 /** 2491 * Pred buffer ptr, updated inside subpel refinement process. This 2492 * location passed to the leaf fxn for copying the winner pred buf 2493 */ 2494 U08 *pu1_pred; 2495 2496 /** Interpolation fxn sent by top layer, should exact qpel be desired */ 2497 PF_INTERP_FXN_T pf_qpel_interp; 2498 2499 /** Working mem passed to leaf fxns */ 2500 U08 *pu1_wkg_mem; 2501 2502 /** prediction buffer stride fo rleaf fxns to copy the pred winner buf */ 2503 S32 i4_pred_stride; 2504 2505 /** Type of input ; sizeof(UWORD8) => unidir refinement, else BIDIR */ 2506 S32 i4_inp_type; 2507 2508 /** Stride of input buf, updated inside subpel fxn */ 2509 S32 i4_inp_stride; 2510 2511 /** 2512 * Pointer to the backward input ptr. This is also updated inside 2513 * the subpel fxn. Needed for BIDIR refinement where modified inpu 2514 * is 2I - P0 2515 */ 2516 S16 *pi2_inp_bck; 2517 2518 /** Indicates if CU merge uses SATD / SAD */ 2519 S32 i4_use_satd_cu_merge; 2520 2521 /** valid MV range in hpel and qpel units */ 2522 range_prms_t *aps_mv_range_hpel[MAX_NUM_REF]; 2523 range_prms_t *aps_mv_range_qpel[MAX_NUM_REF]; 2524 /** Relevant only for mixed CU cases */ 2525 search_results_t *ps_search_results_16x16; 2526 search_results_t *ps_search_results_32x32; 2527 search_results_t *ps_search_results_64x64; 2528 2529 /** Cost computatino fxn ptr */ 2530 PF_MV_COST_FXN pf_mv_cost_compute; 2531 2532 /** Whether BI mode is allowed for this pic (not allowed in P) */ 2533 S32 bidir_enabled; 2534 2535 /** 2536 * Total number of references of current picture which is enocded 2537 */ 2538 U08 u1_num_ref; 2539 2540 /** 2541 * Number of candidates used for refinement 2542 * If given 1 candidate, then 2Nx2N is chosen as the best candidate 2543 */ 2544 U08 u1_max_subpel_candts; 2545 2546 U08 u1_subpel_candt_threshold; 2547 2548 ME_QUALITY_PRESETS_T e_me_quality_presets; 2549 2550 U08 u1_max_subpel_candts_2Nx2N; 2551 U08 u1_max_subpel_candts_NxN; 2552 2553 U08 u1_max_num_subpel_refine_centers; 2554 2555 subpel_refine_ctxt_t *ps_subpel_refine_ctxt; 2556 2557 S32 i4_num_act_ref_l0; 2558 2559 S32 i4_num_act_ref_l1; 2560 2561 U08 u1_is_cu_noisy; 2562 } hme_subpel_prms_t; 2563 2564 /** 2565 ****************************************************************************** 2566 * @struct layers_descr_t 2567 * @brief One such str exists for each ref and curr input in the me ctxt 2568 * Has ctxt handles for all layers of a given POC 2569 ****************************************************************************** 2570 */ 2571 typedef struct 2572 { 2573 /** Handles for all layers. Entry 0 is finest layer */ 2574 layer_ctxt_t *aps_layers[MAX_NUM_LAYERS]; 2575 } layers_descr_t; 2576 2577 /** 2578 ****************************************************************************** 2579 * @struct blk_ctb_attrs_t 2580 * @brief The CTB is split into 16x16 blks. For each such blk, this str 2581 * stores attributes of this blk w.r.t. ctb 2582 ****************************************************************************** 2583 */ 2584 typedef struct 2585 { 2586 /** 2587 * ID of the blk in the full ctb. Assuming the full ctb were coded, 2588 * this indicates what is the blk num of this blk (in encode order) 2589 * within the full ctb 2590 */ 2591 U08 u1_blk_id_in_full_ctb; 2592 2593 /** x and y coordinates of this blk w.r.t. ctb base */ 2594 U08 u1_blk_x; 2595 U08 u1_blk_y; 2596 /** 2597 * Mask of 8x8 blks that are active. Bits 0-3 for blks 0-3 in raster order 2598 * within a 16x16 blk. This will be 0xf in interiors and < 0xf at rt/bot 2599 * boundaries or at bot rt corners, where we may not have full 16x16 blk 2600 */ 2601 U08 u1_blk_8x8_mask; 2602 } blk_ctb_attrs_t; 2603 2604 /** 2605 ****************************************************************************** 2606 * @struct ctb_boundary_attrs_t 2607 * @brief Depending on the location of ctb (rt boundary, bot boundary, 2608 * bot rt corner, elsewhere) this picks out the appropriate 2609 * attributes of the ctb 2610 ****************************************************************************** 2611 */ 2612 typedef struct 2613 { 2614 /** 2615 * 4 bit variable, one for each of the 4 possible 32x32s in a full ctb 2616 * If any 32x32 is partially present / not present at boundaries, that 2617 * bit posn will be 0 2618 */ 2619 U08 u1_merge_to_32x32_flag; 2620 2621 /** 2622 * 1 bit flag indicating whether it is a complete ctb or not, and 2623 * consequently whether it can be merged to a full 64x64 2624 */ 2625 U08 u1_merge_to_64x64_flag; 2626 2627 /** Number of valid 16x16 blks (includes those partially/fully present*/ 2628 U08 u1_num_blks_in_ctb; 2629 2630 /** 16 bit variable indicating whether the corresponding 16x16 is valid */ 2631 S32 cu_16x16_valid_flag; 2632 2633 /** 2634 * For possible 16 16x16 blks in a CTB, we have one attribute str for 2635 * every valid blk. Tightly packed structure. For example, 2636 * 0 1 4 5 2637 * 2 3 6 7 2638 * 8 9 12 13 2639 * 10 11 14 15 2640 * Assuming the ctb width is only 48, blks 5,7,13,15 are invalid 2641 * Then We store attributes in the order: 0,1,2,3,4,6,8,9,10,11,12,14 2642 */ 2643 blk_ctb_attrs_t as_blk_attrs[16]; 2644 } ctb_boundary_attrs_t; 2645 2646 typedef struct 2647 { 2648 S32 sdi; 2649 2650 S32 ref_idx; 2651 2652 S32 cluster_id; 2653 } outlier_data_t; 2654 2655 /** 2656 ****************************************************************************** 2657 * @struct coarse_dyn_range_prms_t 2658 * @brief The parameters for Dyn. Search Range in coarse ME 2659 ****************************************************************************** 2660 */ 2661 2662 typedef struct 2663 { 2664 /* TO DO : size can be reduced, as not getting used for L0 */ 2665 2666 /** Dynamical Search Range parameters per layer & ref_pic */ 2667 dyn_range_prms_t as_dyn_range_prms[MAX_NUM_LAYERS][MAX_NUM_REF]; 2668 2669 /** Min y value Normalized per POC distance */ 2670 WORD16 i2_dyn_min_y_per_poc[MAX_NUM_LAYERS]; 2671 /** Max y value Normalized per POC distance */ 2672 WORD16 i2_dyn_max_y_per_poc[MAX_NUM_LAYERS]; 2673 2674 } coarse_dyn_range_prms_t; 2675 2676 /** 2677 ****************************************************************************** 2678 * @struct coarse_me_ctxt_t 2679 * @brief Handle for Coarse ME 2680 ****************************************************************************** 2681 */ 2682 typedef struct 2683 { 2684 /** Init search candts, 2 sets, one for 4x8 and one for 8x4 */ 2685 search_node_t s_init_search_node[MAX_INIT_CANDTS * 2]; 2686 2687 /** For non enc layer, we search 8x8 blks and store results here */ 2688 search_results_t s_search_results_8x8; 2689 /** 2690 * Below arays store input planes for each ref pic. 2691 * These are duplications, and are present within layer ctxts, but 2692 * kept here together for faster indexing during search 2693 */ 2694 U08 *apu1_list_inp[MAX_NUM_LAYERS][MAX_NUM_REF]; 2695 2696 /** Ptr to all layer context placeholder for curr pic encoded */ 2697 layers_descr_t *ps_curr_descr; 2698 2699 /** Ptr to all layer ctxt place holder for all pics */ 2700 layers_descr_t as_ref_descr[MAX_NUM_REF + 1 + NUM_BUFS_DECOMP_HME]; 2701 2702 /** 2703 * ME uses ref id lc to search multi ref. This TLU gets POC of 2704 * the pic w.r.t. a given ref id 2705 */ 2706 S32 ai4_ref_idx_to_poc_lc[MAX_NUM_REF]; 2707 2708 /** use this array to get disp num from ref_idx. Used for L1 traqo **/ 2709 S32 ai4_ref_idx_to_disp_num[MAX_NUM_REF]; 2710 2711 /** POC of pic encoded just before current */ 2712 S32 i4_prev_poc; 2713 2714 /** POC of curret pic being encoded */ 2715 S32 i4_curr_poc; 2716 2717 /** Number of HME layers encode + non encode */ 2718 S32 num_layers; 2719 2720 /** Alloc time parameter, max ref frms used for this session */ 2721 S32 max_num_ref; 2722 2723 /** 2724 * Number of layers that use explicit search. Explicit search means 2725 * that each ref id is searched separately 2726 */ 2727 S32 num_layers_explicit_search; 2728 2729 /** 2730 * Maximum number of results maintained at any refinement layer 2731 * search. Important from mem alloc perspective 2732 */ 2733 S32 max_num_results; 2734 2735 /** Same as above but for coarse layer */ 2736 S32 max_num_results_coarse; 2737 2738 /** Array of flags, one per layer indicating hwether layer is encoded */ 2739 U08 u1_encode[MAX_NUM_LAYERS]; 2740 2741 /** Init prms send by encoder during create time */ 2742 hme_init_prms_t s_init_prms; 2743 2744 /** 2745 * Array look up created each frm, maintaining the corresponding 2746 * layer descr look up for each ref id 2747 */ 2748 S32 a_ref_to_descr_id[MAX_NUM_REF]; 2749 2750 /** 2751 * Array lookup created each frame that maps a given ref id 2752 * pertaining to unified list to a L0/L1 list. Encoder searches in terms 2753 * of LC list or in other words does not differentiate between L0 2754 * and L1 frames for most of search. Finally to report results to 2755 * encoder, the ref id has to be remapped to suitable list 2756 */ 2757 S32 a_ref_idx_lc_to_l0[MAX_NUM_REF]; 2758 S32 a_ref_idx_lc_to_l1[MAX_NUM_REF]; 2759 2760 /** Width and ht of each layer */ 2761 S32 a_wd[MAX_NUM_LAYERS]; 2762 S32 a_ht[MAX_NUM_LAYERS]; 2763 2764 /** Histogram, one for each ref, allocated during craete time */ 2765 mv_hist_t *aps_mv_hist[MAX_NUM_REF]; 2766 2767 /** Whether a given ref id in Lc list is past frm or future frm */ 2768 U08 au1_is_past[MAX_NUM_REF]; 2769 2770 /** These are L0 and L1 lists, storing ref id Lc in them */ 2771 S08 ai1_past_list[MAX_NUM_REF]; 2772 S08 ai1_future_list[MAX_NUM_REF]; 2773 2774 /** Number of past and future ref pics sent this frm */ 2775 S32 num_ref_past; 2776 S32 num_ref_future; 2777 2778 void *pv_ext_frm_prms; 2779 2780 hme_frm_prms_t *ps_hme_frm_prms; 2781 2782 hme_ref_map_t *ps_hme_ref_map; 2783 /** 2784 * Scale factor of any given ref lc to another ref in Q8 2785 * First MAX_NUM_REF entries are to scale an mv of ref id k 2786 * w.r.t. ref id 0 (approx 256 * POC delta(0) / POC delta(k)) 2787 * Next MAX_NUM_REF entreis are to scale mv of ref id 1 w.r.t. 0 2788 * And so on 2789 */ 2790 S16 ai2_ref_scf[MAX_NUM_REF * MAX_NUM_REF]; 2791 2792 /** bits for a given ref id, in either list L0/L1 */ 2793 U08 au1_ref_bits_tlu_lc[2][MAX_NUM_REF]; 2794 2795 /** Points to above: 1 ptr for each list */ 2796 U08 *apu1_ref_bits_tlu_lc[2]; 2797 2798 /** number of b fraems between P, depends on number of hierarchy layers */ 2799 S32 num_b_frms; 2800 2801 /** Frame level qp passed every frame by ME's caller */ 2802 S32 frm_qstep; 2803 2804 /** Backup of frame parameters */ 2805 hme_frm_prms_t s_frm_prms; 2806 2807 /** Weighted prediction parameters for all references are stored 2808 * Scratch buffers for populated widgted inputs are also stored in this 2809 */ 2810 wgt_pred_ctxt_t s_wt_pred; 2811 2812 /** Weighted pred enable flag */ 2813 S32 i4_wt_pred_enable_flag; 2814 2815 /* Pointer to hold 5 rows of best search node information */ 2816 search_node_t *aps_best_search_nodes_4x8_n_rows[MAX_NUM_REF]; 2817 2818 search_node_t *aps_best_search_nodes_8x4_n_rows[MAX_NUM_REF]; 2819 2820 /* Pointer to hold 5 rows of best search node information */ 2821 S16 *api2_sads_4x4_n_rows[MAX_NUM_REF]; 2822 2823 /* Number of row buffers to store SADs and best search nodes */ 2824 S32 i4_num_row_bufs; 2825 2826 /* (HEVCE_MAX_HEIGHT>>1) assuming layer 1 is coarse layer and >>2 assuming block size is 4x4*/ 2827 S32 ai4_row_index[(HEVCE_MAX_HEIGHT >> 1) >> 2]; 2828 2829 /* store L1 cost required for rate control for enc decision*/ 2830 S32 i4_L1_hme_best_cost; 2831 2832 /* store L1 cost required for modulation index calc*/ 2833 //S32 i4_L1_hme_best_cost_for_ref; 2834 2835 /* store L1 satd */ 2836 S32 i4_L1_hme_sad; 2837 /* EIID: layer1 buffer to store the early inter intra costs and decisions */ 2838 /* pic_level pointer stored here */ 2839 ihevce_ed_blk_t *ps_ed_blk; 2840 /* EIID: layer1 buffer to store the sad/cost information for rate control 2841 or cu level qp modulation*/ 2842 ihevce_ed_ctb_l1_t *ps_ed_ctb_l1; 2843 /** Dynamical Search Range parameters */ 2844 coarse_dyn_range_prms_t s_coarse_dyn_range_prms; 2845 2846 /** Dependency manager for Row level sync in HME pass */ 2847 void *apv_dep_mngr_hme_sync[MAX_NUM_HME_LAYERS - 1]; 2848 2849 /* pointer buffers for memory mapping */ 2850 UWORD8 *pu1_me_reverse_map_info; 2851 2852 /*blk count which has higher SAD*/ 2853 S32 i4_num_blks_high_sad; 2854 2855 /*num of 8x8 blocks in nearest poc*/ 2856 S32 i4_num_blks; 2857 2858 /* thread id of the current context */ 2859 WORD32 thrd_id; 2860 2861 /* Should be typecast to a struct of type 'ihevce_me_optimised_function_list_t' */ 2862 void *pv_me_optimised_function_list; 2863 2864 ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list; 2865 2866 } coarse_me_ctxt_t; 2867 2868 /** 2869 ****************************************************************************** 2870 * @struct coarse_dyn_range_prms_t 2871 * @brief The parameters for Dyn. Search Range in coarse ME 2872 ****************************************************************************** 2873 */ 2874 typedef struct 2875 { 2876 /** Dynamical Search Range parameters per ref_pic */ 2877 dyn_range_prms_t as_dyn_range_prms[MAX_NUM_REF]; 2878 2879 /** Min y value Normalized per POC distance */ 2880 WORD16 i2_dyn_min_y_per_poc; 2881 /** Max y value Normalized per POC distance */ 2882 WORD16 i2_dyn_max_y_per_poc; 2883 2884 /* The number of ref. pic. actually used in L0. Used to communicate */ 2885 /* to ihevce_l0_me_frame_end and frame process */ 2886 WORD32 i4_num_act_ref_in_l0; 2887 2888 /*display number*/ 2889 WORD32 i4_display_num; 2890 2891 } l0_dyn_range_prms_t; 2892 2893 /** 2894 ****************************************************************************** 2895 * @brief inter prediction (MC) context for me loop 2896 ****************************************************************************** 2897 */ 2898 /*IMPORTANT please keep inter_pred_ctxt_t and inter_pred_me_ctxt_t as identical*/ 2899 typedef struct 2900 { 2901 /** pointer to reference lists */ 2902 recon_pic_buf_t *(*ps_ref_list)[HEVCE_MAX_REF_PICS * 2]; 2903 2904 /** scratch buffer for horizontal interpolation destination */ 2905 WORD16 MEM_ALIGN16 ai2_horz_scratch[MAX_CTB_SIZE * (MAX_CTB_SIZE + 8)]; 2906 2907 /** scratch 16 bit buffer for interpolation in l0 direction */ 2908 WORD16 MEM_ALIGN16 ai2_scratch_buf_l0[MAX_CTB_SIZE * MAX_CTB_SIZE]; 2909 2910 /** scratch 16 bit buffer for interpolation in l1 direction */ 2911 WORD16 MEM_ALIGN16 ai2_scratch_buf_l1[MAX_CTB_SIZE * MAX_CTB_SIZE]; 2912 2913 /** Pointer to struct containing function pointers to 2914 functions in the 'common' library' */ 2915 func_selector_t *ps_func_selector; 2916 2917 /** common denominator used for luma weights */ 2918 WORD32 i4_log2_luma_wght_denom; 2919 2920 /** common denominator used for chroma weights */ 2921 WORD32 i4_log2_chroma_wght_denom; 2922 2923 /** offset w.r.t frame start in horz direction (pels) */ 2924 WORD32 i4_ctb_frm_pos_x; 2925 2926 /** offset w.r.t frame start in vert direction (pels) */ 2927 WORD32 i4_ctb_frm_pos_y; 2928 2929 /* Bit Depth of Input */ 2930 WORD32 i4_bit_depth; 2931 2932 /* 0 - 400; 1 - 420; 2 - 422; 3 - 444 */ 2933 UWORD8 u1_chroma_array_type; 2934 2935 /** weighted_pred_flag */ 2936 WORD8 i1_weighted_pred_flag; 2937 2938 /** weighted_bipred_flag */ 2939 WORD8 i1_weighted_bipred_flag; 2940 2941 /** Structure to describe extra CTBs around frame due to search 2942 range associated with distributed-mode. Entries are top, left, 2943 right and bottom */ 2944 WORD32 ai4_tile_xtra_pel[4]; 2945 2946 } inter_pred_me_ctxt_t; 2947 2948 typedef void FT_CALC_SATD_AND_RESULT(err_prms_t *ps_prms, result_upd_prms_t *ps_result_prms); 2949 2950 typedef struct 2951 { 2952 FT_CALC_SATD_AND_RESULT *pf_evalsatd_update_1_best_result_pt_pu_16x16_num_part_eq_1; 2953 FT_CALC_SATD_AND_RESULT *pf_evalsatd_update_1_best_result_pt_pu_16x16_num_part_lt_9; 2954 FT_CALC_SATD_AND_RESULT *pf_evalsatd_update_1_best_result_pt_pu_16x16_num_part_lt_17; 2955 FT_CALC_SATD_AND_RESULT *pf_evalsatd_update_2_best_results_pt_pu_16x16_num_part_eq_1; 2956 FT_CALC_SATD_AND_RESULT *pf_evalsatd_update_2_best_results_pt_pu_16x16_num_part_lt_9; 2957 FT_CALC_SATD_AND_RESULT *pf_evalsatd_update_2_best_results_pt_pu_16x16_num_part_lt_17; 2958 FT_HAD_8X8_USING_4_4X4_R *pf_had_8x8_using_4_4x4_r; 2959 FT_HAD_16X16_R *pf_had_16x16_r; 2960 FT_HAD_32X32_USING_16X16 *pf_compute_32x32HAD_using_16x16; 2961 } me_func_selector_t; 2962 2963 /** 2964 ****************************************************************************** 2965 * @struct me_frm_ctxt_t 2966 * @brief Handle for ME 2967 ****************************************************************************** 2968 */ 2969 typedef struct 2970 { 2971 /** Init search candts, 2 sets, one for 4x8 and one for 8x4 */ 2972 search_node_t s_init_search_node[MAX_INIT_CANDTS]; 2973 2974 /** Motion Vectors array */ 2975 mv_t as_search_cand_mv[MAX_INIT_CANDTS]; 2976 2977 /** Results of 16 16x16 blks within a CTB used in enc layer */ 2978 search_results_t as_search_results_16x16[16]; 2979 2980 /** Results of 4 32x32 blks in a ctb for enc layer merge stage */ 2981 search_results_t as_search_results_32x32[4]; 2982 2983 /** Same as above but fo 64x64 blk */ 2984 search_results_t s_search_results_64x64; 2985 2986 /** 2987 * Below arays store input, 4 recon planes for each ref pic. 2988 * These are duplications, and are present within layer ctxts, but 2989 * kept here together for faster indexing during search 2990 */ 2991 2992 U08 *apu1_list_rec_fxfy[MAX_NUM_LAYERS][MAX_NUM_REF]; 2993 U08 *apu1_list_rec_hxfy[MAX_NUM_LAYERS][MAX_NUM_REF]; 2994 U08 *apu1_list_rec_fxhy[MAX_NUM_LAYERS][MAX_NUM_REF]; 2995 U08 *apu1_list_rec_hxhy[MAX_NUM_LAYERS][MAX_NUM_REF]; 2996 U08 *apu1_list_inp[MAX_NUM_LAYERS][MAX_NUM_REF]; 2997 2998 void *apv_list_dep_mngr[MAX_NUM_LAYERS][MAX_NUM_REF]; 2999 3000 /** Ptr to all layer context placeholder for curr pic encoded */ 3001 layers_descr_t *ps_curr_descr; 3002 3003 /** 3004 * ME uses ref id lc to search multi ref. This TLU gets POC of 3005 * the pic w.r.t. a given ref id 3006 */ 3007 S32 ai4_ref_idx_to_poc_lc[MAX_NUM_REF]; 3008 3009 /** POC of pic encoded just before current */ 3010 S32 i4_prev_poc; 3011 3012 /** POC of curret pic being encoded */ 3013 S32 i4_curr_poc; 3014 3015 /** Buf mgr for memory allocation */ 3016 buf_mgr_t s_buf_mgr; 3017 3018 /** MV Grid for L0 and L1, this is active one used */ 3019 mv_grid_t as_mv_grid[2]; 3020 3021 /** 3022 * MV grid for FPEL and QPEL maintained separately. Depending on the 3023 * correct prediction res. being used, copy appropriate results to 3024 * the as_mv_Grid structure 3025 */ 3026 mv_grid_t as_mv_grid_fpel[2]; 3027 mv_grid_t as_mv_grid_qpel[2]; 3028 3029 /** Number of HME layers encode + non encode */ 3030 S32 num_layers; 3031 3032 /** Alloc time parameter, max ref frms used for this session */ 3033 S32 max_num_ref; 3034 3035 /** 3036 * Number of layers that use explicit search. Explicit search means 3037 * that each ref id is searched separately 3038 */ 3039 S32 num_layers_explicit_search; 3040 3041 /** 3042 * Maximum number of results maintained at any refinement layer 3043 * search. Important from mem alloc perspective 3044 */ 3045 S32 max_num_results; 3046 3047 /** Same as above but for coarse layer */ 3048 S32 max_num_results_coarse; 3049 3050 /** Array of flags, one per layer indicating hwether layer is encoded */ 3051 U08 u1_encode[MAX_NUM_LAYERS]; 3052 3053 /* Parameters used for lambda computation */ 3054 frm_lambda_ctxt_t s_frm_lambda_ctxt; 3055 3056 /** 3057 * Array look up created each frm, maintaining the corresponding 3058 * layer descr look up for each ref id 3059 */ 3060 S32 a_ref_to_descr_id[MAX_NUM_REF]; 3061 3062 /** 3063 * Array lookup created each frame that maps a given ref id 3064 * pertaining to unified list to a L0/L1 list. Encoder searches in terms 3065 * of LC list or in other words does not differentiate between L0 3066 * and L1 frames for most of search. Finally to report results to 3067 * encoder, the ref id has to be remapped to suitable list 3068 */ 3069 S32 a_ref_idx_lc_to_l0[MAX_NUM_REF]; 3070 S32 a_ref_idx_lc_to_l1[MAX_NUM_REF]; 3071 3072 /** Width and ht of each layer */ 3073 S32 i4_wd; 3074 S32 i4_ht; 3075 3076 /** Histogram, one for each ref, allocated during craete time */ 3077 mv_hist_t *aps_mv_hist[MAX_NUM_REF]; 3078 3079 /** 3080 * Back input requiring > 8 bit precision, allocated during 3081 * create time, storing 2I-P0 for Bidir refinement 3082 */ 3083 S16 *pi2_inp_bck; 3084 ctb_boundary_attrs_t as_ctb_bound_attrs[NUM_CTB_BOUNDARY_TYPES]; 3085 3086 /** Whether a given ref id in Lc list is past frm or future frm */ 3087 U08 au1_is_past[MAX_NUM_REF]; 3088 3089 /** These are L0 and L1 lists, storing ref id Lc in them */ 3090 S08 ai1_past_list[MAX_NUM_REF]; 3091 S08 ai1_future_list[MAX_NUM_REF]; 3092 3093 /** Number of past and future ref pics sent this frm */ 3094 S32 num_ref_past; 3095 S32 num_ref_future; 3096 3097 /** 3098 * Passed by encoder, stored as void to avoid header file inclusion 3099 * of encoder wks into ME, these are frm prms passed by encoder, 3100 * pointers to ctbanalyse_t and cu_analyse_t structures and the 3101 * corresponding running ptrs 3102 */ 3103 3104 ctb_analyse_t *ps_ctb_analyse_base; 3105 cur_ctb_cu_tree_t *ps_cu_tree_base; 3106 me_ctb_data_t *ps_me_ctb_data_base; 3107 3108 ctb_analyse_t *ps_ctb_analyse_curr_row; 3109 cu_analyse_t *ps_cu_analyse_curr_row; 3110 cur_ctb_cu_tree_t *ps_cu_tree_curr_row; 3111 me_ctb_data_t *ps_me_ctb_data_curr_row; 3112 3113 /** Log2 of ctb size e.g. for 64 size, it will be 6 */ 3114 S32 log_ctb_size; 3115 3116 hme_frm_prms_t *ps_hme_frm_prms; 3117 3118 hme_ref_map_t *ps_hme_ref_map; 3119 3120 /** 3121 * Scale factor of any given ref lc to another ref in Q8 3122 * First MAX_NUM_REF entries are to scale an mv of ref id k 3123 * w.r.t. ref id 0 (approx 256 * POC delta(0) / POC delta(k)) 3124 * Next MAX_NUM_REF entreis are to scale mv of ref id 1 w.r.t. 0 3125 * And so on 3126 */ 3127 S16 ai2_ref_scf[MAX_NUM_REF * MAX_NUM_REF]; 3128 3129 /** bits for a given ref id, in either list L0/L1 */ 3130 U08 au1_ref_bits_tlu_lc[2][MAX_NUM_REF]; 3131 3132 /** Points to above: 1 ptr for each list */ 3133 U08 *apu1_ref_bits_tlu_lc[2]; 3134 3135 /** 3136 * Frame level base pointer to L0 IPE ctb analyze structures. 3137 * This strucutres include the following 3138 * 3139 * 1. Best costs and modes at all levels of CTB (CU=8,16,32,64) 3140 * 2. Recommended IPE intra CU sizes for this CTB size 3141 * 3. Early intra/inter decision structures for all 8x8 blocks of CTB 3142 * populated by L1-ME and L1-IPE 3143 * 3144 */ 3145 ipe_l0_ctb_analyse_for_me_t *ps_ipe_l0_ctb_frm_base; 3146 3147 /** array of ptrs to intra cost per layer encoded, stored at 8x8 */ 3148 double *apd_intra_cost[MAX_NUM_LAYERS]; 3149 3150 /** number of b fraems between P, depends on number of hierarchy layers */ 3151 S32 num_b_frms; 3152 3153 /** Frame level qp passed every frame by ME's caller */ 3154 S32 frm_qstep; 3155 3156 /** Frame level qp with higher precision : left shifted by 8 */ 3157 S32 qstep_ls8; 3158 3159 /** Backup of frame parameters */ 3160 hme_frm_prms_t s_frm_prms; 3161 3162 /** Weighted prediction parameters for all references are stored 3163 * Scratch buffers for populated widgted inputs are also stored in this 3164 */ 3165 wgt_pred_ctxt_t s_wt_pred; 3166 3167 /** Weighted pred enable flag */ 3168 S32 i4_wt_pred_enable_flag; 3169 3170 /** Results of 16 16x16 blks within a CTB used in enc layer */ 3171 inter_cu_results_t as_cu16x16_results[16]; 3172 3173 /** Results of 4 32x32 blks in a ctb for enc layer merge stage */ 3174 inter_cu_results_t as_cu32x32_results[4]; 3175 3176 /** Same as above but fo 64x64 blk */ 3177 inter_cu_results_t s_cu64x64_results; 3178 3179 /** Results of 64 8x8 blks within a CTB used in enc layer */ 3180 inter_cu_results_t as_cu8x8_results[64]; 3181 3182 WORD32 i4_is_prev_frame_reference; 3183 3184 rc_quant_t *ps_rc_quant_ctxt; 3185 3186 /** Dynamical Search Range parameters */ 3187 l0_dyn_range_prms_t as_l0_dyn_range_prms[NUM_SG_INTERLEAVED]; 3188 3189 /** Dependency manager for Row level sync in L0 ME pass */ 3190 void *pv_dep_mngr_l0_me_sync; 3191 3192 /** Pointer to structure containing function pointers of encoder*/ 3193 me_func_selector_t *ps_func_selector; 3194 3195 cluster_16x16_blk_t *ps_blk_16x16; 3196 3197 cluster_32x32_blk_t *ps_blk_32x32; 3198 3199 cluster_64x64_blk_t *ps_blk_64x64; 3200 3201 ctb_cluster_info_t *ps_ctb_cluster_info; 3202 3203 fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt; 3204 3205 /* thread id of the current context */ 3206 WORD32 thrd_id; 3207 3208 /* dependency manager for froward ME sync */ 3209 void *pv_dep_mngr_encloop_dep_me; 3210 WORD32 i4_l0me_qp_mod; 3211 3212 /*mc ctxt to reuse lume inter pred fucntion 3213 for the purpose of TRAQO*/ 3214 inter_pred_me_ctxt_t s_mc_ctxt; 3215 3216 WORD32 i4_rc_pass; 3217 /*pic type*/ 3218 WORD32 i4_pic_type; 3219 3220 WORD32 i4_temporal_layer; 3221 3222 WORD32 i4_count; 3223 3224 WORD32 i4_use_const_lamda_modifier; 3225 3226 double f_i_pic_lamda_modifier; 3227 3228 UWORD8 u1_is_curFrame_a_refFrame; 3229 3230 /* src_var related variables */ 3231 U32 au4_4x4_src_sigmaX[MAX_NUM_SIGMAS_4x4]; 3232 U32 au4_4x4_src_sigmaXSquared[MAX_NUM_SIGMAS_4x4]; 3233 } me_frm_ctxt_t; 3234 3235 /** 3236 ****************************************************************************** 3237 * @struct me_ctxt_t 3238 * @brief Handle for ME 3239 ****************************************************************************** 3240 */ 3241 typedef struct 3242 { 3243 /** Init prms send by encoder during create time */ 3244 hme_init_prms_t s_init_prms; 3245 3246 /** Not used in encoder, relevant to test bench */ 3247 U08 *pu1_debug_out; 3248 3249 void *pv_ext_frm_prms; 3250 3251 /* Frame level ME ctxt */ 3252 me_frm_ctxt_t *aps_me_frm_prms[MAX_NUM_ME_PARALLEL]; 3253 3254 /** Ptr to all layer ctxt place holder for all pics */ 3255 /** number of reference descriptors should be equal to max number of active references **/ 3256 layers_descr_t as_ref_descr[((DEFAULT_MAX_REFERENCE_PICS << 1) * MAX_NUM_ME_PARALLEL) + 1]; 3257 3258 /* Should be typecast to a struct of type 'ihevce_me_optimised_function_list_t' */ 3259 void *pv_me_optimised_function_list; 3260 3261 ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list; 3262 3263 /* Pointer to Tile params base */ 3264 void *pv_tile_params_base; 3265 3266 } me_ctxt_t; 3267 3268 typedef struct 3269 { 3270 /** array of context for each thread */ 3271 coarse_me_ctxt_t *aps_me_ctxt[MAX_NUM_FRM_PROC_THRDS_PRE_ENC]; 3272 3273 /** memtabs storage memory */ 3274 hme_memtab_t as_memtabs[HME_COARSE_TOT_MEMTABS]; 3275 3276 /** Frame level parameters for ME */ 3277 hme_frm_prms_t s_frm_prms; 3278 3279 /** Holds all reference mapping */ 3280 hme_ref_map_t s_ref_map; 3281 3282 /** number of threads created run time */ 3283 WORD32 i4_num_proc_thrds; 3284 3285 /** Dependency manager for Row level sync in HME pass */ 3286 /* Note : Indexing should be like layer_id - 1 */ 3287 void *apv_dep_mngr_hme_sync[MAX_NUM_HME_LAYERS - 1]; 3288 /* Should be typecast to a struct of type 'ihevce_me_optimised_function_list_t' */ 3289 void *pv_me_optimised_function_list; 3290 3291 ihevce_cmn_opt_func_t s_cmn_opt_func; 3292 } coarse_me_master_ctxt_t; 3293 3294 typedef struct 3295 { 3296 /** array of context for each thread */ 3297 me_ctxt_t *aps_me_ctxt[MAX_NUM_FRM_PROC_THRDS_ENC]; 3298 3299 /** memtabs storage memory */ 3300 hme_memtab_t as_memtabs[MAX_HME_ENC_TOT_MEMTABS]; 3301 3302 /** Frame level parameters for ME */ 3303 hme_frm_prms_t as_frm_prms[MAX_NUM_ME_PARALLEL]; 3304 3305 /** Holds all reference mapping */ 3306 hme_ref_map_t as_ref_map[MAX_NUM_ME_PARALLEL]; 3307 3308 /** number of threads created run time */ 3309 WORD32 i4_num_proc_thrds; 3310 3311 /** number of me frames running in parallel */ 3312 WORD32 i4_num_me_frm_pllel; 3313 3314 /** Pointer to structure containing function pointers of encoder*/ 3315 me_func_selector_t s_func_selector; 3316 /* Should be typecast to a struct of type 'ihevce_me_optimised_function_list_t' */ 3317 void *pv_me_optimised_function_list; 3318 3319 ihevce_cmn_opt_func_t s_cmn_opt_func; 3320 3321 /* Pointer to Tile params base */ 3322 void *pv_tile_params_base; 3323 3324 } me_master_ctxt_t; 3325 3326 typedef struct 3327 { 3328 S16 i2_mv_x; 3329 3330 S16 i2_mv_y; 3331 3332 U08 u1_ref_idx; 3333 3334 U32 au4_node_map[2 * MAP_Y_MAX]; 3335 3336 } subpel_dedup_enabler_t; 3337 3338 typedef subpel_dedup_enabler_t hme_dedup_enabler_t; 3339 3340 typedef struct 3341 { 3342 layer_ctxt_t *ps_curr_layer; 3343 3344 layer_ctxt_t *ps_coarse_layer; 3345 3346 U08 *pu1_num_fpel_search_cands; 3347 3348 S32 *pi4_ref_id_lc_to_l0_map; 3349 3350 S32 *pi4_ref_id_lc_to_l1_map; 3351 3352 S32 i4_pos_x; 3353 3354 S32 i4_pos_y; 3355 3356 S32 i4_num_act_ref_l0; 3357 3358 S32 i4_num_act_ref_l1; 3359 3360 search_candt_t *ps_search_cands; 3361 3362 U08 u1_search_candidate_list_index; 3363 3364 S32 i4_max_num_init_cands; 3365 3366 U08 u1_pred_dir; 3367 3368 /* Indicates the position of the current predDir in the processing order of predDir */ 3369 U08 u1_pred_dir_ctr; 3370 3371 /* The following 4 flags apply exclusively to spatial candidates */ 3372 U08 u1_is_topRight_available; 3373 3374 U08 u1_is_topLeft_available; 3375 3376 U08 u1_is_top_available; 3377 3378 U08 u1_is_left_available; 3379 3380 S08 i1_default_ref_id; 3381 3382 S08 i1_alt_default_ref_id; 3383 3384 U08 u1_num_results_in_mvbank; 3385 3386 BLK_SIZE_T e_search_blk_size; 3387 3388 } fpel_srch_cand_init_data_t; 3389 3390 typedef struct 3391 { 3392 U08 *pu1_pred; 3393 3394 S32 i4_pred_stride; 3395 3396 U08 u1_pred_buf_array_id; 3397 3398 } hme_pred_buf_info_t; 3399 3400 /*****************************************************************************/ 3401 /* Typedefs */ 3402 /*****************************************************************************/ 3403 typedef void (*PF_SAD_FXN_T)(err_prms_t *); 3404 3405 typedef void (*PF_SAD_RESULT_FXN_T)(err_prms_t *, result_upd_prms_t *ps_result_prms); 3406 3407 typedef WORD32 (*PF_SAD_FXN_TU_REC)( 3408 err_prms_t *, 3409 WORD32 lambda, 3410 WORD32 lamda_q_shift, 3411 WORD32 i4_frm_qstep, 3412 me_func_selector_t *ps_func_selector); 3413 3414 typedef void (*PF_RESULT_FXN_T)(result_upd_prms_t *); 3415 3416 typedef void (*PF_CALC_SAD_AND_RESULT)( 3417 hme_search_prms_t *, wgt_pred_ctxt_t *, err_prms_t *, result_upd_prms_t *, U08 **, S32); 3418 3419 #endif /* _HME_DEFS_H_ */ 3420