1 /****************************************************************************** 2 * 3 * Copyright (C) 2015 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ***************************************************************************** 18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 */ 20 21 /** 22 ****************************************************************************** 23 * @file 24 * ih264e_core_coding.h 25 * 26 * @brief 27 * This file contains extern declarations of core coding routines 28 * 29 * @author 30 * ittiam 31 * 32 * @remarks 33 * none 34 ****************************************************************************** 35 */ 36 37 #ifndef IH264E_CORE_CODING_H_ 38 #define IH264E_CORE_CODING_H_ 39 40 /*****************************************************************************/ 41 /* Constant Macros */ 42 /*****************************************************************************/ 43 44 /** 45 ****************************************************************************** 46 * @brief Enable/Disable Hadamard transform of DC Coeff's 47 ****************************************************************************** 48 */ 49 #define DISABLE_DC_TRANSFORM 0 50 #define ENABLE_DC_TRANSFORM 1 51 52 /** 53 ******************************************************************************* 54 * @brief bit masks for DC and AC control flags 55 ******************************************************************************* 56 */ 57 58 #define DC_COEFF_CNT_LUMA_MB 16 59 #define NUM_4X4_BLKS_LUMA_MB_ROW 4 60 #define NUM_LUMA4x4_BLOCKS_IN_MB 16 61 #define NUM_CHROMA4x4_BLOCKS_IN_MB 8 62 63 #define SIZE_4X4_BLK_HRZ TRANS_SIZE_4 64 #define SIZE_4X4_BLK_VERT TRANS_SIZE_4 65 66 #define CNTRL_FLAG_DC_MASK_LUMA 0x0000FFFF 67 #define CNTRL_FLAG_AC_MASK_LUMA 0xFFFF0000 68 69 #define CNTRL_FLAG_AC_MASK_CHROMA_U 0xF0000000 70 #define CNTRL_FLAG_DC_MASK_CHROMA_U 0x0000F000 71 72 #define CNTRL_FLAG_AC_MASK_CHROMA_V 0x0F000000 73 #define CNTRL_FLAG_DC_MASK_CHROMA_V 0x00000F00 74 75 #define CNTRL_FLAG_AC_MASK_CHROMA ( CNTRL_FLAG_AC_MASK_CHROMA_U | CNTRL_FLAG_AC_MASK_CHROMA_V ) 76 #define CNTRL_FLAG_DC_MASK_CHROMA ( CNTRL_FLAG_DC_MASK_CHROMA_U | CNTRL_FLAG_DC_MASK_CHROMA_V ) 77 78 #define CNTRL_FLAG_DCBLK_MASK_CHROMA 0x0000C000 79 80 /** 81 ******************************************************************************* 82 * @brief macros for transforms 83 ******************************************************************************* 84 */ 85 #define DEQUEUE_BLKID_FROM_CONTROL( u4_cntrl, blk_lin_id) \ 86 { \ 87 blk_lin_id = CLZ(u4_cntrl); \ 88 u4_cntrl &= (0x7FFFFFFF >> blk_lin_id); \ 89 }; 90 91 #define IND2SUB_LUMA_MB(u4_blk_id,i4_offset_x,i4_offset_y) \ 92 { \ 93 i4_offset_x = (u4_blk_id % 4) << 2; \ 94 i4_offset_y = (u4_blk_id / 4) << 2; \ 95 } 96 97 #define IND2SUB_CHROMA_MB(u4_blk_id,i4_offset_x,i4_offset_y) \ 98 { \ 99 i4_offset_x = ((u4_blk_id & 0x1 ) << 3) + (u4_blk_id > 3); \ 100 i4_offset_y = (u4_blk_id & 0x2) << 1; \ 101 } 102 103 104 /*****************************************************************************/ 105 /* Function Declarations */ 106 /*****************************************************************************/ 107 108 /** 109 ******************************************************************************* 110 * 111 * @brief 112 * This function performs does the DCT transform then Hadamard transform 113 * and quantization for a macroblock when the mb mode is intra 16x16 mode 114 * 115 * @par Description: 116 * First cf4 is done on all 16 4x4 blocks of the 16x16 input block. 117 * Then hadamard transform is done on the DC coefficients 118 * Quantization is then performed on the 16x16 block, 4x4 wise 119 * 120 * @param[in] pu1_src 121 * Pointer to source sub-block 122 * 123 * @param[in] pu1_pred 124 * Pointer to prediction sub-block 125 * 126 * @param[in] pi2_out 127 * Pointer to residual sub-block 128 * The output will be in linear format 129 * The first 16 continuous locations will contain the values of Dc block 130 * After DC block and a stride 1st AC block will follow 131 * After one more stride next AC block will follow 132 * The blocks will be in raster scan order 133 * 134 * @param[in] src_strd 135 * Source stride 136 * 137 * @param[in] pred_strd 138 * Prediction stride 139 * 140 * @param[in] dst_strd 141 * Destination stride 142 * 143 * @param[in] pu2_scale_matrix 144 * The quantization matrix for 4x4 transform 145 * 146 * @param[in] pu2_threshold_matrix 147 * Threshold matrix 148 * 149 * @param[in] u4_qbits 150 * 15+QP/6 151 * 152 * @param[in] u4_round_factor 153 * Round factor for quant 154 * 155 * @param[out] pu1_nnz 156 * Memory to store the non-zeros after transform 157 * The first byte will be the nnz of DC block 158 * From the next byte the AC nnzs will be stored in raster scan order 159 * 160 * @param u4_dc_flag 161 * Signals if Dc transform is to be done or not 162 * 1 -> Dc transform will be done 163 * 0 -> Dc transform will not be done 164 * 165 * @remarks 166 * 167 ******************************************************************************* 168 */ 169 void ih264e_luma_16x16_resi_trans_dctrans_quant( 170 codec_t *ps_codec, UWORD8 *pu1_src, UWORD8 *pu1_pred, 171 WORD16 *pi2_out, WORD32 src_strd, WORD32 pred_strd, 172 WORD32 dst_strd, const UWORD16 *pu2_scale_matrix, 173 const UWORD16 *pu2_threshold_matrix, UWORD32 u4_qbits, 174 UWORD32 u4_round_factor, UWORD8 *pu1_nnz, UWORD32 u4_dc_flag); 175 176 /** 177 ******************************************************************************* 178 * 179 * @brief 180 * This function performs the intra 16x16 inverse transform process for H264 181 * it includes inverse Dc transform, inverse quant and then inverse transform 182 * 183 * @par Description: 184 * 185 * @param[in] pi2_src 186 * Input data, 16x16 size 187 * First 16 mem locations will have the Dc coffs in rater scan order in linear fashion 188 * after a stride 1st AC clock will be present again in raster can order 189 * Then each AC block of the 16x16 block will follow in raster scan order 190 * 191 * @param[in] pu1_pred 192 * The predicted data, 16x16 size 193 * Block by block form 194 * 195 * @param[in] pu1_out 196 * Output 16x16 197 * In block by block form 198 * 199 * @param[in] src_strd 200 * Source stride 201 * 202 * @param[in] pred_strd 203 * input stride for prediction buffer 204 * 205 * @param[in] out_strd 206 * input stride for output buffer 207 * 208 * @param[in] pu2_iscale_mat 209 * Inverse quantization matrix for 4x4 transform 210 * 211 * @param[in] pu2_weigh_mat 212 * weight matrix of 4x4 transform 213 * 214 * @param[in] qp_div 215 * QP/6 216 * 217 * @param[in] pi4_tmp 218 * Input temporary buffer 219 * needs to be at least 20 in size 220 * 221 * @param[in] pu4_cntrl 222 * Controls the transform path 223 * total Last 17 bits are used 224 * the 16th th bit will correspond to DC block 225 * and 32-17 will correspond to the ac blocks in raster scan order 226 * bit equaling zero indicates that the entire 4x4 block is zero for DC 227 * For AC blocks a bit equaling zero will mean that all 15 AC coffs of the block is nonzero 228 * 229 * @param[in] pi4_tmp 230 * Input temporary buffer 231 * needs to be at least COFF_CNT_SUB_BLK_4x4+COFF_CNT_SUB_BLK_4x4 size 232 * 233 * @returns 234 * none 235 * 236 * @remarks 237 * The all zero case must be taken care outside 238 * 239 ******************************************************************************* 240 */ 241 void ih264e_luma_16x16_idctrans_iquant_itrans_recon( 242 codec_t *ps_codec, WORD16 *pi2_src, UWORD8 *pu1_pred, 243 UWORD8 *pu1_out, WORD32 src_strd, WORD32 pred_strd, 244 WORD32 out_strd, const UWORD16 *pu2_iscale_mat, 245 const UWORD16 *pu2_weigh_mat, UWORD32 qp_div, UWORD32 u4_cntrl, 246 UWORD32 u4_dc_trans_flag, WORD32 *pi4_tmp); 247 248 /** 249 ******************************************************************************* 250 * 251 * @brief 252 * This function performs does the DCT transform then Hadamard transform 253 * and quantization for a chroma macroblock 254 * 255 * @par Description: 256 * First cf4 is done on all 16 4x4 blocks of the 8x8input block 257 * Then hadamard transform is done on the DC coefficients 258 * Quantization is then performed on the 8x8 block, 4x4 wise 259 * 260 * @param[in] pu1_src 261 * Pointer to source sub-block 262 * The input is in interleaved format for two chroma planes 263 * 264 * @param[in] pu1_pred 265 * Pointer to prediction sub-block 266 * Prediction is in inter leaved format 267 * 268 * @param[in] pi2_out 269 * Pointer to residual sub-block 270 * The output will be in linear format 271 * The first 4 continuous locations will contain the values of DC block for U 272 * and then next 4 will contain for V. 273 * After DC block and a stride 1st AC block of U plane will follow 274 * After one more stride next AC block of V plane will follow 275 * The blocks will be in raster scan order 276 * 277 * After all the AC blocks of U plane AC blocks of V plane will follow in exact 278 * same way 279 * 280 * @param[in] src_strd 281 * Source stride 282 * 283 * @param[in] pred_strd 284 * Prediction stride 285 * 286 * @param[in] dst_strd 287 * Destination stride 288 * 289 * @param[in] pu2_scale_matrix 290 * The quantization matrix for 4x4 transform 291 * 292 * @param[in] pu2_threshold_matrix 293 * Threshold matrix 294 * 295 * @param[in] u4_qbits 296 * 15+QP/6 297 * 298 * @param[in] u4_round_factor 299 * Round factor for quant 300 * 301 * @param[out] pu1_nnz 302 * Memory to store the non-zeros after transform 303 * The first byte will be the nnz od DC block for U plane 304 * From the next byte the AC nnzs will be storerd in raster scan order 305 * The fifth byte will be nnz of Dc block of V plane 306 * Then Ac blocks will follow 307 * 308 * @param u4_dc_flag 309 * Signals if Dc transform is to be done or not 310 * 1 -> Dc transform will be done 311 * 0 -> Dc transform will not be done 312 * 313 * @remarks 314 * 315 ******************************************************************************* 316 */ 317 void ih264e_chroma_8x8_resi_trans_dctrans_quant( 318 codec_t *ps_codec, UWORD8 *pu1_src, UWORD8 *pu1_pred, 319 WORD16 *pi2_out, WORD32 src_strd, WORD32 pred_strd, 320 WORD32 out_strd, const UWORD16 *pu2_scale_matrix, 321 const UWORD16 *pu2_threshold_matrix, UWORD32 u4_qbits, 322 UWORD32 u4_round_factor, UWORD8 *pu1_nnz_c); 323 324 /** 325 ******************************************************************************* 326 * @brief 327 * This function performs the inverse transform with process for chroma MB of H264 328 * 329 * @par Description: 330 * Does inverse DC transform ,inverse quantization inverse transform 331 * 332 * @param[in] pi2_src 333 * Input data, 16x16 size 334 * The input is in the form of, first 4 locations will contain DC coeffs of 335 * U plane, next 4 will contain DC coeffs of V plane, then AC blocks of U plane 336 * in raster scan order will follow, each block as linear array in raster scan order. 337 * After a stride next AC block will follow. After all AC blocks of U plane 338 * V plane AC blocks will follow in exact same order. 339 * 340 * @param[in] pu1_pred 341 * The predicted data, 8x16 size, U and V interleaved 342 * 343 * @param[in] pu1_out 344 * Output 8x16, U and V interleaved 345 * 346 * @param[in] src_strd 347 * Source stride 348 * 349 * @param[in] pred_strd 350 * input stride for prediction buffer 351 * 352 * @param[in] out_strd 353 * input stride for output buffer 354 * 355 * @param[in] pu2_iscale_mat 356 * Inverse quantization martix for 4x4 transform 357 * 358 * @param[in] pu2_weigh_mat 359 * weight matrix of 4x4 transform 360 * 361 * @param[in] qp_div 362 * QP/6 363 * 364 * @param[in] pi4_tmp 365 * Input temporary buffer 366 * needs to be at least COFF_CNT_SUB_BLK_4x4 + Number of Dc cofss for chroma * number of planes 367 * in size 368 * 369 * @param[in] pu4_cntrl 370 * Controls the transform path 371 * the 15 th bit will correspond to DC block of U plane , 14th will indicate the V plane Dc block 372 * 32-28 bits will indicate AC blocks of U plane in raster scan order 373 * 27-23 bits will indicate AC blocks of V plane in rater scan order 374 * The bit 1 implies that there is at least one non zero coff in a block 375 * 376 * @returns 377 * none 378 * 379 * @remarks 380 ******************************************************************************* 381 */ 382 void ih264e_chroma_8x8_idctrans_iquant_itrans_recon( 383 codec_t *ps_codec, WORD16 *pi2_src, UWORD8 *pu1_pred, 384 UWORD8 *pu1_out, WORD32 src_strd, WORD32 pred_strd, 385 WORD32 out_strd, const UWORD16 *pu2_iscale_mat, 386 const UWORD16 *pu2_weigh_mat, UWORD32 qp_div, UWORD32 u4_cntrl, 387 WORD32 *pi4_tmp); 388 389 /** 390 ****************************************************************************** 391 * 392 * @brief This function packs residue of an i16x16 luma mb for entropy coding 393 * 394 * @par Description 395 * An i16 macro block contains two classes of units, dc 4x4 block and 396 * 4x4 ac blocks. while packing the mb, the dc block is sent first, and 397 * the 16 ac blocks are sent next in scan order. Each and every block is 398 * represented by 3 parameters (nnz, significant coefficient map and the 399 * residue coefficients itself). If a 4x4 unit does not have any coefficients 400 * then only nnz is sent. Inside a 4x4 block the individual coefficients are 401 * sent in scan order. 402 * 403 * The first byte of each block will be nnz of the block, if it is non zero, 404 * a 2 byte significance map is sent. This is followed by nonzero coefficients. 405 * This is repeated for 1 dc + 16 ac blocks. 406 * 407 * @param[in] pi2_res_mb 408 * pointer to residue mb 409 * 410 * @param[in, out] pv_mb_coeff_data 411 * buffer pointing to packed residue coefficients 412 * 413 * @param[in] u4_res_strd 414 * residual block stride 415 * 416 * @param[out] u1_cbp_l 417 * coded block pattern luma 418 * 419 * @param[in] pu1_nnz 420 * number of non zero coefficients in each 4x4 unit 421 * 422 * @param[out] 423 * Control signal for inverse transform of 16x16 blocks 424 * 425 * @return none 426 * 427 * @ remarks 428 * 429 ****************************************************************************** 430 */ 431 void ih264e_pack_l_mb_i16(WORD16 *pi2_res_mb, void **pv_mb_coeff_data, 432 WORD32 i4_res_strd, UWORD8 *u1_cbp_l, UWORD8 *pu1_nnz, 433 UWORD32 *pu4_cntrl); 434 435 /** 436 ****************************************************************************** 437 * 438 * @brief This function packs residue of an i8x8 chroma mb for entropy coding 439 * 440 * @par Description 441 * An i8 chroma macro block contains two classes of units, dc 2x2 block and 442 * 4x4 ac blocks. while packing the mb, the dc block is sent first, and 443 * the 4 ac blocks are sent next in scan order. Each and every block is 444 * represented by 3 parameters (nnz, significant coefficient map and the 445 * residue coefficients itself). If a 4x4 unit does not have any coefficients 446 * then only nnz is sent. Inside a 4x4 block the individual coefficients are 447 * sent in scan order. 448 * 449 * The first byte of each block will be nnz of the block, if it is non zero, 450 * a 2 byte significance map is sent. This is followed by nonzero coefficients. 451 * This is repeated for 1 dc + 4 ac blocks. 452 * 453 * @param[in] pi2_res_mb 454 * pointer to residue mb 455 * 456 * @param[in, out] pv_mb_coeff_data 457 * buffer pointing to packed residue coefficients 458 * 459 * @param[in] u4_res_strd 460 * residual block stride 461 * 462 * @param[out] u1_cbp_c 463 * coded block pattern chroma 464 * 465 * @param[in] pu1_nnz 466 * number of non zero coefficients in each 4x4 unit 467 * 468 * @param[out] pu1_nnz 469 * Control signal for inverse transform 470 * 471 * @param[in] u4_swap_uv 472 * Swaps the order of U and V planes in entropy bitstream 473 * 474 * @return none 475 * 476 * @ remarks 477 * 478 ****************************************************************************** 479 */ 480 void ih264e_pack_c_mb(WORD16 *pi2_res_mb, void **pv_mb_coeff_data, 481 WORD32 i4_res_strd, UWORD8 *u1_cbp_c, UWORD8 *pu1_nnz, 482 UWORD32 u4_kill_coffs_flag, UWORD32 *pu4_cntrl, 483 UWORD32 u4_swap_uv); 484 485 /** 486 ******************************************************************************* 487 * 488 * @brief performs luma core coding when intra mode is i16x16 489 * 490 * @par Description: 491 * If the current mb is to be coded as intra of mb type i16x16, the mb is first 492 * predicted using one of i16x16 prediction filters, basing on the intra mode 493 * chosen. Then, error is computed between the input blk and the estimated blk. 494 * This error is transformed (hierarchical transform i.e., dct followed by hada- 495 * -mard), quantized. The quantized coefficients are packed in scan order for 496 * entropy coding. 497 * 498 * @param[in] ps_proc_ctxt 499 * pointer to the current macro block context 500 * 501 * @returns u1_cbp_l 502 * coded block pattern luma 503 * 504 * @remarks none 505 * 506 ******************************************************************************* 507 */ 508 UWORD8 ih264e_code_luma_intra_macroblock_16x16 509 ( 510 process_ctxt_t *ps_proc 511 ); 512 513 /** 514 ******************************************************************************* 515 * 516 * @brief performs luma core coding when intra mode is i4x4 517 * 518 * @par Description: 519 * If the current mb is to be coded as intra of mb type i4x4, the mb is first 520 * predicted using one of i4x4 prediction filters, basing on the intra mode 521 * chosen. Then, error is computed between the input blk and the estimated blk. 522 * This error is dct transformed and quantized. The quantized coefficients are 523 * packed in scan order for entropy coding. 524 * 525 * @param[in] ps_proc_ctxt 526 * pointer to the current macro block context 527 * 528 * @returns u1_cbp_l 529 * coded block pattern luma 530 * 531 * @remarks 532 * The traversal of 4x4 subblocks in the 16x16 macroblock is as per the scan order 533 * mentioned in h.264 specification 534 * 535 ******************************************************************************* 536 */ 537 UWORD8 ih264e_code_luma_intra_macroblock_4x4 538 ( 539 process_ctxt_t *ps_proc 540 ); 541 542 /** 543 ******************************************************************************* 544 * 545 * @brief performs luma core coding when intra mode is i4x4 546 * 547 * @par Description: 548 * If the current mb is to be coded as intra of mb type i4x4, the mb is first 549 * predicted using one of i4x4 prediction filters, basing on the intra mode 550 * chosen. Then, error is computed between the input blk and the estimated blk. 551 * This error is dct transformed and quantized. The quantized coefficients are 552 * packed in scan order for entropy coding. 553 * 554 * @param[in] ps_proc_ctxt 555 * pointer to the current macro block context 556 * 557 * @returns u1_cbp_l 558 * coded block pattern luma 559 * 560 * @remarks 561 * The traversal of 4x4 subblocks in the 16x16 macroblock is as per the scan order 562 * mentioned in h.264 specification 563 * 564 ******************************************************************************* 565 */ 566 UWORD8 ih264e_code_luma_intra_macroblock_4x4_rdopt_on 567 ( 568 process_ctxt_t *ps_proc 569 ); 570 571 /** 572 ******************************************************************************* 573 * 574 * @brief performs chroma core coding for intra macro blocks 575 * 576 * @par Description: 577 * If the current MB is to be intra coded with mb type chroma I8x8, the MB is 578 * first predicted using intra 8x8 prediction filters. The predicted data is 579 * compared with the input for error and the error is transformed. The DC 580 * coefficients of each transformed sub blocks are further transformed using 581 * Hadamard transform. The resulting coefficients are quantized, packed and sent 582 * for entropy coding. 583 * 584 * @param[in] ps_proc_ctxt 585 * pointer to the current macro block context 586 * 587 * @returns u1_cbp_c 588 * coded block pattern chroma 589 * 590 * @remarks 591 * The traversal of 4x4 subblocks in the 8x8 macroblock is as per the scan order 592 * mentioned in h.264 specification 593 * 594 ******************************************************************************* 595 */ 596 UWORD8 ih264e_code_chroma_intra_macroblock_8x8 597 ( 598 process_ctxt_t *ps_proc 599 ); 600 601 /** 602 ******************************************************************************* 603 * @brief performs luma core coding when mode is inter 604 * 605 * @par Description: 606 * If the current mb is to be coded as inter predicted mb,based on the sub mb 607 * partitions and corresponding motion vectors generated by ME, prediction is done. 608 * Then, error is computed between the input blk and the estimated blk. 609 * This error is transformed ( dct and with out hadamard), quantized. The 610 * quantized coefficients are packed in scan order for entropy coding. 611 * 612 * @param[in] ps_proc_ctxt 613 * pointer to the current macro block context 614 * 615 * @returns u1_cbp_l 616 * coded block pattern luma 617 * 618 * @remarks none 619 * 620 ******************************************************************************* 621 */ 622 UWORD8 ih264e_code_luma_inter_macroblock_16x16 623 ( 624 process_ctxt_t *ps_proc 625 ); 626 627 /** 628 ******************************************************************************* 629 * @brief performs chroma core coding for inter macro blocks 630 * 631 * @par Description: 632 * If the current mb is to be coded as inter predicted mb, based on the sub mb 633 * partitions and corresponding motion vectors generated by ME, prediction is done. 634 * Then, error is computed between the input blk and the estimated blk. 635 * This error is transformed, quantized. The quantized coefficients 636 * are packed in scan order for entropy coding. 637 * 638 * @param[in] ps_proc_ctxt 639 * pointer to the current macro block context 640 * 641 * @returns u1_cbp_l 642 * coded block pattern luma 643 * 644 * @remarks none 645 * 646 ******************************************************************************* 647 */ 648 UWORD8 ih264e_code_chroma_inter_macroblock_8x8 649 ( 650 process_ctxt_t *ps_proc 651 ); 652 653 #endif /* IH264E_CORE_CODING_H_ */ 654