1 /******************************************************************************
2 *
3 * Copyright (C) 2015 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20
21 /**
22 *******************************************************************************
23 * @file
24 * ih264e_core_coding.c
25 *
26 * @brief
27 * This file contains routines that perform luma and chroma core coding for
28 * intra macroblocks
29 *
30 * @author
31 * ittiam
32 *
33 * @par List of Functions:
34 * - ih264e_pack_l_mb_i16()
35 * - ih264e_pack_c_mb_i8()
36 * - ih264e_code_luma_intra_macroblock_16x16()
37 * - ih264e_code_luma_intra_macroblock_4x4()
38 * - ih264e_code_chroma_intra_macroblock_8x8()
39 *
40 * @remarks
41 * None
42 *
43 *******************************************************************************
44 */
45
46 /*****************************************************************************/
47 /* File Includes */
48 /*****************************************************************************/
49
50 /* System include files */
51 #include <stdio.h>
52 #include <string.h>
53 #include <assert.h>
54
55 /* User include files */
56 #include "ih264e_config.h"
57 #include "ih264_typedefs.h"
58 #include "ih264_platform_macros.h"
59 #include "iv2.h"
60 #include "ive2.h"
61 #include "ih264_macros.h"
62 #include "ih264_defs.h"
63 #include "ih264e_defs.h"
64 #include "ih264_trans_data.h"
65 #include "ih264e_error.h"
66 #include "ih264e_bitstream.h"
67 #include "ime_distortion_metrics.h"
68 #include "ime_defs.h"
69 #include "ime_structs.h"
70 #include "ih264_structs.h"
71 #include "ih264_trans_quant_itrans_iquant.h"
72 #include "ih264_inter_pred_filters.h"
73 #include "ih264_mem_fns.h"
74 #include "ih264_padding.h"
75 #include "ih264_intra_pred_filters.h"
76 #include "ih264_deblk_edge_filters.h"
77 #include "ih264_cabac_tables.h"
78 #include "irc_cntrl_param.h"
79 #include "irc_frame_info_collector.h"
80 #include "ih264e_rate_control.h"
81 #include "ih264e_cabac_structs.h"
82 #include "ih264e_structs.h"
83 #include "ih264e_globals.h"
84 #include "ih264e_core_coding.h"
85 #include "ih264e_mc.h"
86
87
88 /*****************************************************************************/
89 /* Function Definitions */
90 /*****************************************************************************/
91
92 /**
93 *******************************************************************************
94 *
95 * @brief
96 * This function performs does the DCT transform then Hadamard transform
97 * and quantization for a macroblock when the mb mode is intra 16x16 mode
98 *
99 * @par Description:
100 * First cf4 is done on all 16 4x4 blocks of the 16x16 input block.
101 * Then hadamard transform is done on the DC coefficients
102 * Quantization is then performed on the 16x16 block, 4x4 wise
103 *
104 * @param[in] pu1_src
105 * Pointer to source sub-block
106 *
107 * @param[in] pu1_pred
108 * Pointer to prediction sub-block
109 *
110 * @param[in] pi2_out
111 * Pointer to residual sub-block
112 * The output will be in linear format
113 * The first 16 continuous locations will contain the values of Dc block
114 * After DC block and a stride 1st AC block will follow
115 * After one more stride next AC block will follow
116 * The blocks will be in raster scan order
117 *
118 * @param[in] src_strd
119 * Source stride
120 *
121 * @param[in] pred_strd
122 * Prediction stride
123 *
124 * @param[in] dst_strd
125 * Destination stride
126 *
127 * @param[in] pu2_scale_matrix
128 * The quantization matrix for 4x4 transform
129 *
130 * @param[in] pu2_threshold_matrix
131 * Threshold matrix
132 *
133 * @param[in] u4_qbits
134 * 15+QP/6
135 *
136 * @param[in] u4_round_factor
137 * Round factor for quant
138 *
139 * @param[out] pu1_nnz
140 * Memory to store the non-zeros after transform
141 * The first byte will be the nnz of DC block
142 * From the next byte the AC nnzs will be stored in raster scan order
143 *
144 * @param u4_dc_flag
145 * Signals if Dc transform is to be done or not
146 * 1 -> Dc transform will be done
147 * 0 -> Dc transform will not be done
148 *
149 * @remarks
150 *
151 *******************************************************************************
152 */
ih264e_luma_16x16_resi_trans_dctrans_quant(codec_t * ps_codec,UWORD8 * pu1_src,UWORD8 * pu1_pred,WORD16 * pi2_out,WORD32 src_strd,WORD32 pred_strd,WORD32 dst_strd,const UWORD16 * pu2_scale_matrix,const UWORD16 * pu2_threshold_matrix,UWORD32 u4_qbits,UWORD32 u4_round_factor,UWORD8 * pu1_nnz,UWORD32 u4_dc_flag)153 void ih264e_luma_16x16_resi_trans_dctrans_quant(codec_t *ps_codec,
154 UWORD8 *pu1_src,
155 UWORD8 *pu1_pred,
156 WORD16 *pi2_out,
157 WORD32 src_strd,
158 WORD32 pred_strd,
159 WORD32 dst_strd,
160 const UWORD16 *pu2_scale_matrix,
161 const UWORD16 *pu2_threshold_matrix,
162 UWORD32 u4_qbits,
163 UWORD32 u4_round_factor,
164 UWORD8 *pu1_nnz,
165 UWORD32 u4_dc_flag)
166
167 {
168 WORD32 blk_cntr;
169 WORD32 i4_offsetx, i4_offsety;
170 UWORD8 *pu1_curr_src, *pu1_curr_pred;
171
172 WORD16 *pi2_dc_str = pi2_out;
173
174 /* Move to the ac addresses */
175 pu1_nnz++;
176 pi2_out += dst_strd;
177
178 for (blk_cntr = 0; blk_cntr < NUM_LUMA4x4_BLOCKS_IN_MB; blk_cntr++)
179 {
180 IND2SUB_LUMA_MB(blk_cntr, i4_offsetx, i4_offsety);
181
182 pu1_curr_src = pu1_src + i4_offsetx + i4_offsety * src_strd;
183 pu1_curr_pred = pu1_pred + i4_offsetx + i4_offsety * pred_strd;
184
185 ps_codec->pf_resi_trans_quant_4x4(pu1_curr_src, pu1_curr_pred,
186 pi2_out + blk_cntr * dst_strd,
187 src_strd, pred_strd, pu2_scale_matrix,
188 pu2_threshold_matrix, u4_qbits,
189 u4_round_factor, &pu1_nnz[blk_cntr],
190 &pi2_dc_str[blk_cntr]);
191
192 }
193
194 if (!u4_dc_flag)
195 return;
196
197 /*
198 * In case of i16x16, we need to remove the contribution of dc coeffs into
199 * nnz of each block. We are doing that in the packing function
200 */
201
202 /* Adjust pointers to point to dc values */
203 pi2_out -= dst_strd;
204 pu1_nnz--;
205
206 u4_qbits++;
207 u4_round_factor <<= 1;
208
209 ps_codec->pf_hadamard_quant_4x4(pi2_dc_str, pi2_out, pu2_scale_matrix,
210 pu2_threshold_matrix, u4_qbits,
211 u4_round_factor, &pu1_nnz[0]);
212 }
213
214 /**
215 *******************************************************************************
216 *
217 * @brief
218 * This function performs the intra 16x16 inverse transform process for H264
219 * it includes inverse Dc transform, inverse quant and then inverse transform
220 *
221 * @par Description:
222 *
223 * @param[in] pi2_src
224 * Input data, 16x16 size
225 * First 16 mem locations will have the Dc coffs in rater scan order in linear fashion
226 * after a stride 1st AC clock will be present again in raster can order
227 * Then each AC block of the 16x16 block will follow in raster scan order
228 *
229 * @param[in] pu1_pred
230 * The predicted data, 16x16 size
231 * Block by block form
232 *
233 * @param[in] pu1_out
234 * Output 16x16
235 * In block by block form
236 *
237 * @param[in] src_strd
238 * Source stride
239 *
240 * @param[in] pred_strd
241 * input stride for prediction buffer
242 *
243 * @param[in] out_strd
244 * input stride for output buffer
245 *
246 * @param[in] pu2_iscale_mat
247 * Inverse quantization matrix for 4x4 transform
248 *
249 * @param[in] pu2_weigh_mat
250 * weight matrix of 4x4 transform
251 *
252 * @param[in] qp_div
253 * QP/6
254 *
255 * @param[in] pi4_tmp
256 * Input temporary buffer
257 * needs to be at least 20 in size
258 *
259 * @param[in] pu4_cntrl
260 * Controls the transform path
261 * total Last 17 bits are used
262 * the 16th th bit will correspond to DC block
263 * and 32-17 will correspond to the ac blocks in raster scan order
264 * bit equaling zero indicates that the entire 4x4 block is zero for DC
265 * For AC blocks a bit equaling zero will mean that all 15 AC coffs of the block is nonzero
266 *
267 * @param[in] pi4_tmp
268 * Input temporary buffer
269 * needs to be at least COFF_CNT_SUB_BLK_4x4+COFF_CNT_SUB_BLK_4x4 size
270 *
271 * @returns
272 * none
273 *
274 * @remarks
275 * The all zero case must be taken care outside
276 *
277 *******************************************************************************
278 */
ih264e_luma_16x16_idctrans_iquant_itrans_recon(codec_t * ps_codec,WORD16 * pi2_src,UWORD8 * pu1_pred,UWORD8 * pu1_out,WORD32 src_strd,WORD32 pred_strd,WORD32 out_strd,const UWORD16 * pu2_iscale_mat,const UWORD16 * pu2_weigh_mat,UWORD32 qp_div,UWORD32 u4_cntrl,UWORD32 u4_dc_trans_flag,WORD32 * pi4_tmp)279 void ih264e_luma_16x16_idctrans_iquant_itrans_recon(codec_t *ps_codec,
280 WORD16 *pi2_src,
281 UWORD8 *pu1_pred,
282 UWORD8 *pu1_out,
283 WORD32 src_strd,
284 WORD32 pred_strd,
285 WORD32 out_strd,
286 const UWORD16 *pu2_iscale_mat,
287 const UWORD16 *pu2_weigh_mat,
288 UWORD32 qp_div,
289 UWORD32 u4_cntrl,
290 UWORD32 u4_dc_trans_flag,
291 WORD32 *pi4_tmp)
292 {
293 /* Start index for inverse quant in a 4x4 block */
294 WORD32 iq_start_idx = (u4_dc_trans_flag == 0) ? 0 : 1;
295
296 /* Cntrl bits for 4x4 transforms
297 * u4_blk_cntrl : controls if a 4x4 block should be processed in ac path
298 * u4_dc_cntrl : controls is a 4x4 block is to be processed in dc path
299 * : dc block must contain only single dc coefficient
300 * u4_empty_blk_cntrl : control fot 4x4 block with no coeffs, ie no dc and ac
301 * : ie not (ac or dc)
302 */
303 UWORD32 u4_blk_cntrl, u4_dc_cntrl, u4_empty_blk_cntrl;
304
305 /* tmp registers for block ids */
306 UWORD32 u4_blk_id;
307
308 /* Subscrripts */
309 WORD32 i4_offset_x, i4_offset_y;
310
311 UWORD8 *pu1_cur_prd_blk, *pu1_cur_out_blk;
312
313 /* Src and stride for dc coeffs */
314 UWORD32 u4_dc_inc;
315 WORD16 *pi2_dc_src;
316
317 /*
318 * For intra blocks we need to do inverse dc transform
319 * In case if intra blocks, its here that we populate the dc bits in cntrl
320 * as they cannot be populated any earlier
321 */
322 if (u4_dc_trans_flag)
323 {
324 UWORD32 cntr, u4_dc_cntrl;
325 /* Do inv hadamard and place the results at the start of each AC block */
326 ps_codec->pf_ihadamard_scaling_4x4(pi2_src, pi2_src, pu2_iscale_mat,
327 pu2_weigh_mat, qp_div, pi4_tmp);
328
329 /* Update the cntrl flag */
330 u4_dc_cntrl = 0;
331 for (cntr = 0; cntr < DC_COEFF_CNT_LUMA_MB; cntr++)
332 {
333 u4_dc_cntrl |= ((pi2_src[cntr] != 0) << (15 - cntr));
334 }
335 /* Mark dc bits as 1 if corresponding ac bit is 0 */
336 u4_dc_cntrl = (~(u4_cntrl >> 16) & u4_dc_cntrl);
337 /* Combine both ac and dc bits */
338 u4_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_LUMA)
339 | (u4_dc_cntrl & CNTRL_FLAG_DC_MASK_LUMA);
340 }
341
342 /* Source for dc coeffs
343 * If the block is intra, we have to read dc values from first row of src
344 * then stride for each block is 1, other wise its src stride
345 */
346 pi2_dc_src = (iq_start_idx == 0) ? (pi2_src + src_strd) : pi2_src;
347 u4_dc_inc = (iq_start_idx == 0) ? src_strd : 1;
348
349 /* The AC blocks starts from 2nd row */
350 pi2_src += src_strd;
351
352 /* Get the block bits */
353 u4_blk_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_LUMA);
354 u4_dc_cntrl = (u4_cntrl & CNTRL_FLAG_DC_MASK_LUMA) << 16;
355 u4_empty_blk_cntrl = (~(u4_dc_cntrl | u4_blk_cntrl)) & 0xFFFF0000;
356
357 /* Get first block to process */
358 DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id);
359 while (u4_blk_id < NUM_LUMA4x4_BLOCKS_IN_MB)
360 {
361 /* Compute address of src blocks */
362 WORD32 i4_src_offset = u4_dc_inc * u4_blk_id;
363
364 IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
365
366 /* Compute address of out and pred blocks */
367 pu1_cur_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
368 pu1_cur_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
369
370 /* Do inv dc transform */
371 ps_codec->pf_iquant_itrans_recon_4x4_dc(pi2_dc_src + i4_src_offset,
372 pu1_cur_prd_blk,
373 pu1_cur_out_blk, pred_strd,
374 out_strd, pu2_iscale_mat,
375 pu2_weigh_mat, qp_div, NULL,
376 iq_start_idx,
377 pi2_dc_src + i4_src_offset);
378 /* Get next DC block to process */
379 DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id);
380 }
381
382 /* now process ac/mixed blocks */
383 DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id);
384 while (u4_blk_id < NUM_LUMA4x4_BLOCKS_IN_MB)
385 {
386
387 WORD32 i4_src_offset = src_strd * u4_blk_id;
388
389 IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
390
391 pu1_cur_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
392 pu1_cur_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
393
394 ps_codec->pf_iquant_itrans_recon_4x4(pi2_src + i4_src_offset,
395 pu1_cur_prd_blk, pu1_cur_out_blk,
396 pred_strd, out_strd,
397 pu2_iscale_mat, pu2_weigh_mat,
398 qp_div, (WORD16*) pi4_tmp,
399 iq_start_idx,
400 pi2_dc_src + u4_blk_id);
401
402 DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id);
403 }
404
405 /* Now process empty blocks */
406 DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id);
407 while (u4_blk_id < NUM_LUMA4x4_BLOCKS_IN_MB)
408 {
409 IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
410
411 pu1_cur_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
412 pu1_cur_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
413
414 ps_codec->pf_inter_pred_luma_copy(pu1_cur_prd_blk, pu1_cur_out_blk,
415 pred_strd, out_strd, SIZE_4X4_BLK_HRZ,
416 SIZE_4X4_BLK_VERT, 0, 0);
417
418 DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id);
419 }
420 }
421
422 /**
423 *******************************************************************************
424 *
425 * @brief
426 * This function performs does the DCT transform then Hadamard transform
427 * and quantization for a chroma macroblock
428 *
429 * @par Description:
430 * First cf4 is done on all 16 4x4 blocks of the 8x8input block
431 * Then hadamard transform is done on the DC coefficients
432 * Quantization is then performed on the 8x8 block, 4x4 wise
433 *
434 * @param[in] pu1_src
435 * Pointer to source sub-block
436 * The input is in interleaved format for two chroma planes
437 *
438 * @param[in] pu1_pred
439 * Pointer to prediction sub-block
440 * Prediction is in inter leaved format
441 *
442 * @param[in] pi2_out
443 * Pointer to residual sub-block
444 * The output will be in linear format
445 * The first 4 continuous locations will contain the values of DC block for U
446 * and then next 4 will contain for V.
447 * After DC block and a stride 1st AC block of U plane will follow
448 * After one more stride next AC block of V plane will follow
449 * The blocks will be in raster scan order
450 *
451 * After all the AC blocks of U plane AC blocks of V plane will follow in exact
452 * same way
453 *
454 * @param[in] src_strd
455 * Source stride
456 *
457 * @param[in] pred_strd
458 * Prediction stride
459 *
460 * @param[in] dst_strd
461 * Destination stride
462 *
463 * @param[in] pu2_scale_matrix
464 * The quantization matrix for 4x4 transform
465 *
466 * @param[in] pu2_threshold_matrix
467 * Threshold matrix
468 *
469 * @param[in] u4_qbits
470 * 15+QP/6
471 *
472 * @param[in] u4_round_factor
473 * Round factor for quant
474 *
475 * @param[out] pu1_nnz
476 * Memory to store the non-zeros after transform
477 * The first byte will be the nnz od DC block for U plane
478 * From the next byte the AC nnzs will be storerd in raster scan order
479 * The fifth byte will be nnz of Dc block of V plane
480 * Then Ac blocks will follow
481 *
482 * @param u4_dc_flag
483 * Signals if Dc transform is to be done or not
484 * 1 -> Dc transform will be done
485 * 0 -> Dc transform will not be done
486 *
487 * @remarks
488 *
489 *******************************************************************************
490 */
ih264e_chroma_8x8_resi_trans_dctrans_quant(codec_t * ps_codec,UWORD8 * pu1_src,UWORD8 * pu1_pred,WORD16 * pi2_out,WORD32 src_strd,WORD32 pred_strd,WORD32 out_strd,const UWORD16 * pu2_scale_matrix,const UWORD16 * pu2_threshold_matrix,UWORD32 u4_qbits,UWORD32 u4_round_factor,UWORD8 * pu1_nnz_c)491 void ih264e_chroma_8x8_resi_trans_dctrans_quant(codec_t *ps_codec,
492 UWORD8 *pu1_src,
493 UWORD8 *pu1_pred,
494 WORD16 *pi2_out,
495 WORD32 src_strd,
496 WORD32 pred_strd,
497 WORD32 out_strd,
498 const UWORD16 *pu2_scale_matrix,
499 const UWORD16 *pu2_threshold_matrix,
500 UWORD32 u4_qbits,
501 UWORD32 u4_round_factor,
502 UWORD8 *pu1_nnz_c)
503 {
504 WORD32 blk_cntr;
505 WORD32 i4_offsetx, i4_offsety;
506 UWORD8 *pu1_curr_src, *pu1_curr_pred;
507
508 WORD16 pi2_dc_str[8];
509 UWORD8 au1_dcnnz[2];
510
511 /* Move to the ac addresses */
512 pu1_nnz_c++;
513 pi2_out += out_strd;
514
515 for (blk_cntr = 0; blk_cntr < NUM_CHROMA4x4_BLOCKS_IN_MB; blk_cntr++)
516 {
517 IND2SUB_CHROMA_MB(blk_cntr, i4_offsetx, i4_offsety);
518
519 pu1_curr_src = pu1_src + i4_offsetx + i4_offsety * src_strd;
520 pu1_curr_pred = pu1_pred + i4_offsetx + i4_offsety * pred_strd;
521
522 /* For chroma, v plane nnz is populated from position 5 */
523 ps_codec->pf_resi_trans_quant_chroma_4x4(
524 pu1_curr_src, pu1_curr_pred,
525 pi2_out + blk_cntr * out_strd, src_strd, pred_strd,
526 pu2_scale_matrix, pu2_threshold_matrix, u4_qbits,
527 u4_round_factor, &pu1_nnz_c[blk_cntr + (blk_cntr > 3)],
528 &pi2_dc_str[blk_cntr]);
529 }
530
531 /* Adjust pointers to point to dc values */
532 pi2_out -= out_strd;
533 pu1_nnz_c--;
534
535 u4_qbits++;
536 u4_round_factor <<= 1;
537
538 ps_codec->pf_hadamard_quant_2x2_uv(pi2_dc_str, pi2_out, pu2_scale_matrix,
539 pu2_threshold_matrix, u4_qbits,
540 u4_round_factor, au1_dcnnz);
541
542 /* Copy the dc nnzs */
543 pu1_nnz_c[0] = au1_dcnnz[0];
544 pu1_nnz_c[5] = au1_dcnnz[1];
545
546 }
547
548 /**
549 *******************************************************************************
550 * @brief
551 * This function performs the inverse transform with process for chroma MB of H264
552 *
553 * @par Description:
554 * Does inverse DC transform ,inverse quantization inverse transform
555 *
556 * @param[in] pi2_src
557 * Input data, 16x16 size
558 * The input is in the form of, first 4 locations will contain DC coeffs of
559 * U plane, next 4 will contain DC coeffs of V plane, then AC blocks of U plane
560 * in raster scan order will follow, each block as linear array in raster scan order.
561 * After a stride next AC block will follow. After all AC blocks of U plane
562 * V plane AC blocks will follow in exact same order.
563 *
564 * @param[in] pu1_pred
565 * The predicted data, 8x16 size, U and V interleaved
566 *
567 * @param[in] pu1_out
568 * Output 8x16, U and V interleaved
569 *
570 * @param[in] src_strd
571 * Source stride
572 *
573 * @param[in] pred_strd
574 * input stride for prediction buffer
575 *
576 * @param[in] out_strd
577 * input stride for output buffer
578 *
579 * @param[in] pu2_iscale_mat
580 * Inverse quantization martix for 4x4 transform
581 *
582 * @param[in] pu2_weigh_mat
583 * weight matrix of 4x4 transform
584 *
585 * @param[in] qp_div
586 * QP/6
587 *
588 * @param[in] pi4_tmp
589 * Input temporary buffer
590 * needs to be at least COFF_CNT_SUB_BLK_4x4 + Number of Dc cofss for chroma * number of planes
591 * in size
592 *
593 * @param[in] pu4_cntrl
594 * Controls the transform path
595 * the 15 th bit will correspond to DC block of U plane , 14th will indicate the V plane Dc block
596 * 32-28 bits will indicate AC blocks of U plane in raster scan order
597 * 27-23 bits will indicate AC blocks of V plane in rater scan order
598 * The bit 1 implies that there is at least one non zero coeff in a block
599 *
600 * @returns
601 * none
602 *
603 * @remarks
604 *******************************************************************************
605 */
ih264e_chroma_8x8_idctrans_iquant_itrans_recon(codec_t * ps_codec,WORD16 * pi2_src,UWORD8 * pu1_pred,UWORD8 * pu1_out,WORD32 src_strd,WORD32 pred_strd,WORD32 out_strd,const UWORD16 * pu2_iscale_mat,const UWORD16 * pu2_weigh_mat,UWORD32 qp_div,UWORD32 u4_cntrl,WORD32 * pi4_tmp)606 void ih264e_chroma_8x8_idctrans_iquant_itrans_recon(codec_t *ps_codec,
607 WORD16 *pi2_src,
608 UWORD8 *pu1_pred,
609 UWORD8 *pu1_out,
610 WORD32 src_strd,
611 WORD32 pred_strd,
612 WORD32 out_strd,
613 const UWORD16 *pu2_iscale_mat,
614 const UWORD16 *pu2_weigh_mat,
615 UWORD32 qp_div,
616 UWORD32 u4_cntrl,
617 WORD32 *pi4_tmp)
618 {
619 /* Cntrl bits for 4x4 transforms
620 * u4_blk_cntrl : controls if a 4x4 block should be processed in ac path
621 * u4_dc_cntrl : controls is a 4x4 block is to be processed in dc path
622 * : dc block must contain only single dc coefficient
623 * u4_empty_blk_cntrl : control fot 4x4 block with no coeffs, ie no dc and ac
624 * : ie not (ac or dc)
625 */
626
627 UWORD32 u4_blk_cntrl, u4_dc_cntrl, u4_empty_blk_cntrl;
628
629 /* tmp registers for block ids */
630 WORD32 u4_blk_id;
631
632 /* Offsets for pointers */
633 WORD32 i4_offset_x, i4_offset_y;
634
635 /* Pointer to 4x4 blocks */
636 UWORD8 *pu1_cur_4x4_prd_blk, *pu1_cur_4x4_out_blk;
637
638 /* Tmp register for pointer to dc coffs */
639 WORD16 *pi2_dc_src;
640
641 WORD16 i2_zero = 0;
642
643 /* Increment for dc block */
644 WORD32 i4_dc_inc;
645
646 /*
647 * Lets do the inverse transform for dc coeffs in chroma
648 */
649 if (u4_cntrl & CNTRL_FLAG_DCBLK_MASK_CHROMA)
650 {
651 UWORD32 cntr, u4_dc_cntrl;
652 /* Do inv hadamard for u an v block */
653
654 ps_codec->pf_ihadamard_scaling_2x2_uv(pi2_src, pi2_src, pu2_iscale_mat,
655 pu2_weigh_mat, qp_div, NULL);
656 /*
657 * Update the cntrl flag
658 * Flag is updated as follows bits 15-11 -> u block dc bits
659 */
660 u4_dc_cntrl = 0;
661 for (cntr = 0; cntr < 8; cntr++)
662 {
663 u4_dc_cntrl |= ((pi2_src[cntr] != 0) << (15 - cntr));
664 }
665
666 /* Mark dc bits as 1 if corresponding ac bit is 0 */
667 u4_dc_cntrl = (~(u4_cntrl >> 16) & u4_dc_cntrl);
668 /* Combine both ac and dc bits */
669 u4_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_CHROMA)
670 | (u4_dc_cntrl & CNTRL_FLAG_DC_MASK_CHROMA);
671
672 /* Since we populated the dc coffs, we have to read them from there */
673 pi2_dc_src = pi2_src;
674 i4_dc_inc = 1;
675 }
676 else
677 {
678 u4_cntrl = u4_cntrl & CNTRL_FLAG_AC_MASK_CHROMA;
679 pi2_dc_src = &i2_zero;
680 i4_dc_inc = 0;
681 }
682
683 /* Get the block bits */
684 u4_blk_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_CHROMA);
685 u4_dc_cntrl = (u4_cntrl & CNTRL_FLAG_DC_MASK_CHROMA) << 16;
686 u4_empty_blk_cntrl = (~(u4_dc_cntrl | u4_blk_cntrl)) & 0xFF000000;
687
688 /* The AC blocks starts from 2nd row */
689 pi2_src += src_strd;
690
691 DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id);
692 while (u4_blk_id < 8)
693 {
694 WORD32 dc_src_offset = u4_blk_id * i4_dc_inc;
695
696 IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
697
698 pu1_cur_4x4_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
699 pu1_cur_4x4_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
700
701 ps_codec->pf_iquant_itrans_recon_chroma_4x4_dc(
702 pi2_dc_src + dc_src_offset, pu1_cur_4x4_prd_blk,
703 pu1_cur_4x4_out_blk, pred_strd, out_strd, NULL, NULL, 0,
704 NULL, pi2_dc_src + dc_src_offset);
705 /* Get next DC block to process */
706 DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id);
707 }
708
709 /* now process ac/mixed blocks */
710 DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id);
711 while (u4_blk_id < 8)
712 {
713 WORD32 i4_src_offset = src_strd * u4_blk_id;
714 WORD32 dc_src_offset = i4_dc_inc * u4_blk_id;
715
716 IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
717
718 pu1_cur_4x4_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
719 pu1_cur_4x4_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
720
721 ps_codec->pf_iquant_itrans_recon_chroma_4x4(pi2_src + i4_src_offset,
722 pu1_cur_4x4_prd_blk,
723 pu1_cur_4x4_out_blk,
724 pred_strd, out_strd,
725 pu2_iscale_mat,
726 pu2_weigh_mat, qp_div,
727 (WORD16 *) pi4_tmp,
728 pi2_dc_src + dc_src_offset);
729
730 DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id);
731 }
732
733 /* Now process empty blocks */
734 DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id);
735 while (u4_blk_id < 8)
736 {
737 IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
738
739 pu1_cur_4x4_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
740 pu1_cur_4x4_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
741
742 ps_codec->pf_interleave_copy(pu1_cur_4x4_prd_blk, pu1_cur_4x4_out_blk,
743 pred_strd, out_strd, SIZE_4X4_BLK_VERT,
744 SIZE_4X4_BLK_HRZ);
745
746 DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id);
747 }
748 }
749
750 /**
751 ******************************************************************************
752 *
753 * @brief This function packs residue of an i16x16 luma mb for entropy coding
754 *
755 * @par Description
756 * An i16 macro block contains two classes of units, dc 4x4 block and
757 * 4x4 ac blocks. while packing the mb, the dc block is sent first, and
758 * the 16 ac blocks are sent next in scan order. Each and every block is
759 * represented by 3 parameters (nnz, significant coefficient map and the
760 * residue coefficients itself). If a 4x4 unit does not have any coefficients
761 * then only nnz is sent. Inside a 4x4 block the individual coefficients are
762 * sent in scan order.
763 *
764 * The first byte of each block will be nnz of the block, if it is non zero,
765 * a 2 byte significance map is sent. This is followed by nonzero coefficients.
766 * This is repeated for 1 dc + 16 ac blocks.
767 *
768 * @param[in] pi2_res_mb
769 * pointer to residue mb
770 *
771 * @param[in, out] pv_mb_coeff_data
772 * buffer pointing to packed residue coefficients
773 *
774 * @param[in] u4_res_strd
775 * residual block stride
776 *
777 * @param[out] u1_cbp_l
778 * coded block pattern luma
779 *
780 * @param[in] pu1_nnz
781 * number of non zero coefficients in each 4x4 unit
782 *
783 * @param[out]
784 * Control signal for inverse transform of 16x16 blocks
785 *
786 * @return none
787 *
788 * @ remarks
789 *
790 ******************************************************************************
791 */
ih264e_pack_l_mb_i16(WORD16 * pi2_res_mb,void ** pv_mb_coeff_data,WORD32 i4_res_strd,UWORD8 * u1_cbp_l,UWORD8 * pu1_nnz,UWORD32 * pu4_cntrl)792 void ih264e_pack_l_mb_i16(WORD16 *pi2_res_mb,
793 void **pv_mb_coeff_data,
794 WORD32 i4_res_strd,
795 UWORD8 *u1_cbp_l,
796 UWORD8 *pu1_nnz,
797 UWORD32 *pu4_cntrl)
798 {
799 /* pointer to packed sub block buffer space */
800 tu_sblk_coeff_data_t *ps_mb_coeff_data = (*pv_mb_coeff_data), *ps_mb_coeff_data_ac;
801
802 /* no of non zero coefficients in the current sub block */
803 UWORD32 u4_nnz_cnt;
804
805 /* significant coefficient map */
806 UWORD32 u4_s_map;
807
808 /* pointer to scanning matrix */
809 const UWORD8 *pu1_scan_order;
810
811 /* number of non zeros in sub block */
812 UWORD32 u4_nnz;
813
814 /* coeff scan order */
815 const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15};
816
817 /* temp var */
818 UWORD32 coeff_cnt, mask, b4,u4_cntrl=0;
819
820 /*DC and AC coeff pointers*/
821 WORD16 *pi2_res_mb_ac,*pi2_res_mb_dc;
822
823 /********************************************************/
824 /* pack dc coeff data for entropy coding */
825 /********************************************************/
826
827 pi2_res_mb_dc = pi2_res_mb;
828 pu1_scan_order = gu1_luma_scan_order_dc;
829
830 u4_nnz = *pu1_nnz;
831 u4_cntrl = 0;
832
833 /* write number of non zero coefficients */
834 ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
835
836 if (u4_nnz)
837 {
838 for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++)
839 {
840 if (pi2_res_mb_dc[pu1_scan_order[coeff_cnt]])
841 {
842 /* write residue */
843 ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb_dc[pu1_scan_order[coeff_cnt]];
844 u4_s_map |= mask;
845 }
846 mask <<= 1;
847 }
848 /* write significant coeff map */
849 ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
850 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt);
851
852 u4_cntrl = 0x00008000;// Set DC bit in ctrl code
853 }
854 else
855 {
856 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
857 }
858
859 /********************************************************/
860 /* pack ac coeff data for entropy coding */
861 /********************************************************/
862
863 pu1_nnz ++;
864 pu1_scan_order = gu1_luma_scan_order;
865 pi2_res_mb += i4_res_strd; /*Move to AC block*/
866
867 ps_mb_coeff_data_ac = (*pv_mb_coeff_data);
868
869 for (b4 = 0; b4 < 16; b4++)
870 {
871 ps_mb_coeff_data = (*pv_mb_coeff_data);
872
873 u4_nnz = pu1_nnz[u1_scan_order[b4]];
874
875 /* Jump according to the scan order */
876 pi2_res_mb_ac = pi2_res_mb + (i4_res_strd * u1_scan_order[b4]);
877
878 /*
879 * Since this is a i16x16 block, we should not count dc coeff on indi
880 * vidual 4x4 blocks to nnz. But due to the implementation of 16x16
881 * trans function, we add dc's nnz to u4_nnz too. Hence we adjust that
882 * here
883 */
884 u4_nnz -= (pi2_res_mb_ac[0] != 0);
885
886 /* write number of non zero coefficients */
887 ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
888
889 if (u4_nnz)
890 {
891 for (u4_nnz_cnt = 0, coeff_cnt = 1, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++)
892 {
893 if (pi2_res_mb_ac[pu1_scan_order[coeff_cnt]])
894 {
895 /* write residue */
896 ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb_ac[pu1_scan_order[coeff_cnt]];
897 u4_s_map |= mask;
898 }
899 mask <<= 1;
900 }
901 /* write significant coeff map */
902 ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
903 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt);
904 *u1_cbp_l = 15;
905
906 u4_cntrl |= (1 << (31 - u1_scan_order[b4]));
907 }
908 else
909 {
910 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
911 }
912
913 }
914
915 if (!(*u1_cbp_l))
916 {
917 (*pv_mb_coeff_data) = ps_mb_coeff_data_ac;
918 }
919
920 /* Store the cntrl signal */
921 (*pu4_cntrl) = u4_cntrl;
922 return;
923 }
924
925 /**
926 ******************************************************************************
927 *
928 * @brief This function packs residue of an p16x16 luma mb for entropy coding
929 *
930 * @par Description
931 * A p16x16 macro block contains two classes of units 16 4x4 ac blocks.
932 * while packing the mb, the dc block is sent first, and
933 * the 16 ac blocks are sent next in scan order. Each and every block is
934 * represented by 3 parameters (nnz, significant coefficient map and the
935 * residue coefficients itself). If a 4x4 unit does not have any coefficients
936 * then only nnz is sent. Inside a 4x4 block the individual coefficients are
937 * sent in scan order.
938 *
939 * The first byte of each block will be nnz of the block, if it is non zero,
940 * a 2 byte significance map is sent. This is followed by nonzero coefficients.
941 * This is repeated for 1 dc + 16 ac blocks.
942 *
943 * @param[in] pi2_res_mb
944 * pointer to residue mb
945 *
946 * @param[in, out] pv_mb_coeff_data
947 * buffer pointing to packed residue coefficients
948 *
949 * @param[in] i4_res_strd
950 * residual block stride
951 *
952 * @param[out] u1_cbp_l
953 * coded block pattern luma
954 *
955 * @param[in] pu1_nnz
956 * number of non zero coefficients in each 4x4 unit
957 *
958 * @param[out] pu4_cntrl
959 * Control signal for inverse transform
960 *
961 * @return none
962 *
963 * @remarks Killing coffs not yet coded
964 *
965 ******************************************************************************
966 */
ih264e_pack_l_mb(WORD16 * pi2_res_mb,void ** pv_mb_coeff_data,WORD32 i4_res_strd,UWORD8 * u1_cbp_l,UWORD8 * pu1_nnz,UWORD32 u4_thres_resi,UWORD32 * pu4_cntrl)967 void ih264e_pack_l_mb(WORD16 *pi2_res_mb,
968 void **pv_mb_coeff_data,
969 WORD32 i4_res_strd,
970 UWORD8 *u1_cbp_l,
971 UWORD8 *pu1_nnz,
972 UWORD32 u4_thres_resi,
973 UWORD32 *pu4_cntrl)
974 {
975 /* pointer to packed sub block buffer space */
976 tu_sblk_coeff_data_t *ps_mb_coeff_data, *ps_mb_coeff_data_b8, *ps_mb_coeff_data_mb;
977
978 /* no of non zero coefficients in the current sub block */
979 UWORD32 u4_nnz_cnt;
980
981 /* significant coefficient map */
982 UWORD32 u4_s_map;
983
984 /* pointer to scanning matrix */
985 const UWORD8 *pu1_scan_order = gu1_luma_scan_order;
986
987 /* number of non zeros in sub block */
988 UWORD32 u4_nnz;
989
990 /* pointer to residual sub block */
991 WORD16 *pi2_res_sb;
992
993 /* coeff scan order */
994 const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15};
995
996 /* coeff cost */
997 const UWORD8 *pu1_coeff_cost = gu1_coeff_cost;
998
999 /* temp var */
1000 UWORD32 u4_mb_coeff_cost = 0, u4_b8_coeff_cost = 0, coeff_cnt, mask, u4_cntrl = 0, b4, b8;
1001
1002 /* temp var */
1003 WORD32 i4_res_val, i4_run = -1, dcac_block;
1004
1005 /* When Hadamard transform is disabled, first row values are dont care, ignore them */
1006 pi2_res_mb += i4_res_strd;
1007
1008 /* When Hadamard transform is disabled, first unit value is dont care, ignore this */
1009 pu1_nnz ++;
1010
1011 ps_mb_coeff_data_mb = ps_mb_coeff_data_b8 = (*pv_mb_coeff_data);
1012
1013 /********************************************************/
1014 /* pack coeff data for entropy coding */
1015 /********************************************************/
1016
1017 for (b4 = 0; b4 < 16; b4++)
1018 {
1019 ps_mb_coeff_data = (*pv_mb_coeff_data);
1020
1021 b8 = b4 >> 2;
1022
1023 u4_nnz = pu1_nnz[u1_scan_order[b4]];
1024
1025 /* Jump according to the scan order */
1026 pi2_res_sb = pi2_res_mb + (i4_res_strd * u1_scan_order[b4]);
1027
1028 /* write number of non zero coefficients */
1029 ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
1030
1031 if (u4_nnz)
1032 {
1033 for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++)
1034 {
1035 /* number of runs of zero before, this is used to compute coeff cost */
1036 i4_run++;
1037
1038 i4_res_val = pi2_res_sb[pu1_scan_order[coeff_cnt]];
1039
1040 if (i4_res_val)
1041 {
1042 /* write residue */
1043 ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = i4_res_val;
1044 u4_s_map |= mask;
1045
1046 if (u4_thres_resi)
1047 {
1048 /* compute coeff cost */
1049 if (i4_res_val == 1 || i4_res_val == -1)
1050 {
1051 if (i4_run < 6)
1052 u4_b8_coeff_cost += pu1_coeff_cost[i4_run];
1053 }
1054 else
1055 u4_b8_coeff_cost += 9;
1056
1057 i4_run = -1;
1058 }
1059 }
1060
1061 mask <<= 1;
1062 }
1063
1064 /* write significant coeff map */
1065 ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
1066 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt);
1067
1068 /* cbp */
1069 *u1_cbp_l |= (1 << b8);
1070
1071 /* Cntrl map for inverse transform computation
1072 *
1073 * If coeff_cnt is zero, it means that only nonzero was a dc coeff
1074 * Hence we have to set the 16 - u1_scan_order[b4]) position instead
1075 * of 31 - u1_scan_order[b4]
1076 */
1077 dcac_block = (coeff_cnt == 0)?16:31;
1078 u4_cntrl |= (1 << (dcac_block - u1_scan_order[b4]));
1079 }
1080 else
1081 {
1082 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
1083 }
1084
1085 /* Decide if the 8x8 unit has to be sent for entropy coding? */
1086 if ((b4+1) % 4 == 0)
1087 {
1088 if ( u4_thres_resi && (u4_b8_coeff_cost <= LUMA_SUB_BLOCK_SKIP_THRESHOLD) &&
1089 (*u1_cbp_l & (1 << b8)) )
1090 {
1091
1092
1093 /*
1094 * When we want to reset the full 8x8 block, we have to reset
1095 * both the dc and ac coeff bits hence we have the symmetric
1096 * arrangement of bits
1097 */
1098 const UWORD32 cntrl_mask_map[4] = {0xcc00cc00, 0x33003300, 0x00cc00cc, 0x00330033};
1099
1100 /* restore cbp */
1101 *u1_cbp_l = (*u1_cbp_l & (~(1 << b8)));
1102
1103 /* correct cntrl flag */
1104 u4_cntrl = u4_cntrl & (~cntrl_mask_map[(b4 >> 2)]);
1105
1106 /* correct nnz */
1107 pu1_nnz[u1_scan_order[b4 - 3]] = 0;
1108 pu1_nnz[u1_scan_order[b4 - 2]] = 0;
1109 pu1_nnz[u1_scan_order[b4 - 1]] = 0;
1110 pu1_nnz[u1_scan_order[b4]] = 0;
1111
1112 /* reset blk cost */
1113 u4_b8_coeff_cost = 0;
1114 }
1115
1116 if (!(*u1_cbp_l & (1 << b8)))
1117 {
1118 (*pv_mb_coeff_data) = ps_mb_coeff_data_b8;
1119 }
1120
1121 u4_mb_coeff_cost += u4_b8_coeff_cost;
1122
1123 u4_b8_coeff_cost = 0;
1124 i4_run = -1;
1125 ps_mb_coeff_data_b8 = (*pv_mb_coeff_data);
1126 }
1127 }
1128
1129 if (u4_thres_resi && (u4_mb_coeff_cost <= LUMA_BLOCK_SKIP_THRESHOLD)
1130 && (*u1_cbp_l))
1131 {
1132 (*pv_mb_coeff_data) = ps_mb_coeff_data_mb;
1133 *u1_cbp_l = 0;
1134 u4_cntrl = 0;
1135 memset(pu1_nnz, 0, 16);
1136 }
1137
1138 (*pu4_cntrl) = u4_cntrl;
1139
1140 return;
1141 }
1142
1143 /**
1144 ******************************************************************************
1145 *
1146 * @brief This function packs residue of an i8x8 chroma mb for entropy coding
1147 *
1148 * @par Description
1149 * An i8 chroma macro block contains two classes of units, dc 2x2 block and
1150 * 4x4 ac blocks. while packing the mb, the dc block is sent first, and
1151 * the 4 ac blocks are sent next in scan order. Each and every block is
1152 * represented by 3 parameters (nnz, significant coefficient map and the
1153 * residue coefficients itself). If a 4x4 unit does not have any coefficients
1154 * then only nnz is sent. Inside a 4x4 block the individual coefficients are
1155 * sent in scan order.
1156 *
1157 * The first byte of each block will be nnz of the block, if it is non zero,
1158 * a 2 byte significance map is sent. This is followed by nonzero coefficients.
1159 * This is repeated for 1 dc + 4 ac blocks.
1160 *
1161 * @param[in] pi2_res_mb
1162 * pointer to residue mb
1163 *
1164 * @param[in, out] pv_mb_coeff_data
1165 * buffer pointing to packed residue coefficients
1166 *
1167 * @param[in] u4_res_strd
1168 * residual block stride
1169 *
1170 * @param[out] u1_cbp_c
1171 * coded block pattern chroma
1172 *
1173 * @param[in] pu1_nnz
1174 * number of non zero coefficients in each 4x4 unit
1175 *
1176 * @param[out] pu1_nnz
1177 * Control signal for inverse transform
1178 *
1179 * @param[in] u4_swap_uv
1180 * Swaps the order of U and V planes in entropy bitstream
1181 *
1182 * @return none
1183 *
1184 * @ remarks
1185 *
1186 ******************************************************************************
1187 */
ih264e_pack_c_mb(WORD16 * pi2_res_mb,void ** pv_mb_coeff_data,WORD32 i4_res_strd,UWORD8 * u1_cbp_c,UWORD8 * pu1_nnz,UWORD32 u4_thres_resi,UWORD32 * pu4_cntrl,UWORD32 u4_swap_uv)1188 void ih264e_pack_c_mb(WORD16 *pi2_res_mb,
1189 void **pv_mb_coeff_data,
1190 WORD32 i4_res_strd,
1191 UWORD8 *u1_cbp_c,
1192 UWORD8 *pu1_nnz,
1193 UWORD32 u4_thres_resi,
1194 UWORD32 *pu4_cntrl,
1195 UWORD32 u4_swap_uv)
1196 {
1197 /* pointer to packed sub block buffer space */
1198 tu_sblk_coeff_data_t *ps_mb_coeff_data = (*pv_mb_coeff_data);
1199 tu_sblk_coeff_data_t *ps_mb_coeff_data_dc, *ps_mb_coeff_data_ac;
1200
1201 /* nnz pointer */
1202 UWORD8 *pu1_nnz_ac, *pu1_nnz_dc;
1203
1204 /* nnz counter */
1205 UWORD32 u4_nnz_cnt;
1206
1207 /* significant coefficient map */
1208 UWORD32 u4_s_map;
1209
1210 /* pointer to scanning matrix */
1211 const UWORD8 *pu1_scan_order;
1212
1213 /* no of non zero coefficients in the current sub block */
1214 UWORD32 u4_nnz;
1215
1216 /* pointer to residual sub block, res val */
1217 WORD16 *pi2_res_sb, i2_res_val;
1218
1219 /* temp var */
1220 UWORD32 coeff_cnt, mask, b4,plane;
1221
1222 /* temp var */
1223 UWORD32 u4_coeff_cost;
1224 WORD32 i4_run;
1225
1226 /* coeff cost */
1227 const UWORD8 *pu1_coeff_cost = gu1_coeff_cost;
1228
1229 /* pointer to packed buffer space */
1230 UWORD32 *pu4_mb_coeff_data = NULL;
1231
1232 /* ac coded block pattern */
1233 UWORD8 u1_cbp_ac;
1234
1235 /* Variable to store the current bit pos in cntrl variable*/
1236 UWORD32 cntrl_pos = 0;
1237
1238 /********************************************************/
1239 /* pack dc coeff data for entropy coding */
1240 /********************************************************/
1241 pu1_scan_order = gu1_chroma_scan_order_dc;
1242 pi2_res_sb = pi2_res_mb;
1243 pu1_nnz_dc = pu1_nnz;
1244 (*pu4_cntrl) = 0;
1245 cntrl_pos = 15;
1246 ps_mb_coeff_data_dc = (*pv_mb_coeff_data);
1247
1248 /* Color space conversion between SP_UV and SP_VU
1249 * We always assume SP_UV for all the processing
1250 * Hence to get proper stream output we need to swap U and V channels here
1251 *
1252 * For that there are two paths we need to look for
1253 * One is the path to bitstream , these variables should have the proper input
1254 * configured UV or VU
1255 * For the other path the inverse transform variables should have what ever ordering the
1256 * input had
1257 */
1258
1259 if (u4_swap_uv)
1260 {
1261 pu1_nnz_dc += 5;/* Move to NNZ of V planve */
1262 pi2_res_sb += 4;/* Move to DC coff of V plane */
1263
1264 cntrl_pos = 14; /* Control bit for V plane */
1265 }
1266
1267 for (plane = 0; plane < 2; plane++)
1268 {
1269 ps_mb_coeff_data = (*pv_mb_coeff_data);
1270
1271 u4_nnz = *pu1_nnz_dc;
1272 /* write number of non zero coefficients U/V */
1273 ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
1274
1275 if (u4_nnz)
1276 {
1277 for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++)
1278 {
1279 i2_res_val = pi2_res_sb[pu1_scan_order[coeff_cnt]];
1280 if (i2_res_val)
1281 {
1282 /* write residue U/V */
1283 ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = i2_res_val;
1284 u4_s_map |= mask;
1285 }
1286 mask <<= 1;
1287 }
1288 /* write significant coeff map U/V */
1289 ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
1290 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt);
1291 *u1_cbp_c = 1;
1292
1293 (*pu4_cntrl) |= (1 << cntrl_pos);
1294 }
1295 else
1296 {
1297 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
1298 }
1299
1300 if (u4_swap_uv)
1301 {
1302 cntrl_pos++; /* Control bit for U plane */
1303 pu1_nnz_dc -= 5; /* Move to NNZ of U plane */
1304 pi2_res_sb -= 4; /* Move to DC coff of U plane */
1305
1306 }
1307 else
1308 {
1309 cntrl_pos--; /* Control bit for U plane */
1310 pu1_nnz_dc += 5; /* 4 for AC NNZ and 1 for DC */
1311 pi2_res_sb += 4; /* Move to DC coff of V plane */
1312 }
1313 }
1314
1315 /********************************************************/
1316 /* pack ac coeff data for entropy coding */
1317 /********************************************************/
1318
1319 pu1_scan_order = gu1_chroma_scan_order;
1320 ps_mb_coeff_data_ac = (*pv_mb_coeff_data);
1321
1322 if (u4_swap_uv)
1323 {
1324 pi2_res_sb = pi2_res_mb + i4_res_strd * 5; /* Move to V plane ,ie 1dc row+ 4 ac row */
1325 cntrl_pos = 27; /* The control bits are to be added for V bloc ie 31-4 th bit */
1326 pu1_nnz_ac = pu1_nnz + 6;/*Move the nnz to V block NNZ 1 dc + 1dc + 4 ac */
1327 }
1328 else
1329 {
1330 pi2_res_sb = pi2_res_mb + i4_res_strd; /* Move to U plane ,ie 1dc row */
1331 cntrl_pos = 31;
1332 pu1_nnz_ac = pu1_nnz + 1; /* Move the nnz to V block NNZ 1 dc */
1333 }
1334
1335 for (plane = 0; plane < 2; plane++)
1336 {
1337 pu4_mb_coeff_data = (*pv_mb_coeff_data);
1338
1339 u4_coeff_cost = 0;
1340 i4_run = -1;
1341
1342 /* get the current cbp, so that it automatically
1343 * gets reverted in case of zero ac values */
1344 u1_cbp_ac = *u1_cbp_c;
1345
1346 for (b4 = 0; b4 < 4; b4++)
1347 {
1348 ps_mb_coeff_data = (*pv_mb_coeff_data);
1349
1350 u4_nnz = *pu1_nnz_ac;
1351
1352 /*
1353 * We are scanning only ac coeffs, but the nnz is for the
1354 * complete 4x4 block. Hence we have to discount the nnz contributed
1355 * by the dc coefficient
1356 */
1357 u4_nnz -= (pi2_res_sb[0]!=0);
1358
1359 /* write number of non zero coefficients U/V */
1360 ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
1361
1362 if (u4_nnz)
1363 {
1364 for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++)
1365 {
1366 i2_res_val = pi2_res_sb[pu1_scan_order[coeff_cnt]];
1367
1368 i4_run++;
1369
1370 if (i2_res_val)
1371 {
1372 /* write residue U/V */
1373 ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = i2_res_val;
1374 u4_s_map |= mask;
1375
1376 if ( u4_thres_resi && (u4_coeff_cost < CHROMA_BLOCK_SKIP_THRESHOLD) )
1377 {
1378 /* compute coeff cost */
1379 if (i2_res_val == 1 || i2_res_val == -1)
1380 {
1381 if (i4_run < 6)
1382 u4_coeff_cost += pu1_coeff_cost[i4_run];
1383 }
1384 else
1385 u4_coeff_cost += 9;
1386
1387 i4_run = -1;
1388 }
1389 }
1390 mask <<= 1;
1391 }
1392
1393 /* write significant coeff map U/V */
1394 ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
1395 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt);
1396 u1_cbp_ac = 2;
1397
1398 (*pu4_cntrl) |= 1 << cntrl_pos;
1399 }
1400 else
1401 {
1402 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
1403 }
1404
1405 pu1_nnz_ac++;
1406 pi2_res_sb += i4_res_strd;
1407 cntrl_pos--;
1408 }
1409
1410 /* reset block */
1411 if (u4_thres_resi && (u4_coeff_cost < CHROMA_BLOCK_SKIP_THRESHOLD))
1412 {
1413 pu4_mb_coeff_data[0] = 0;
1414 pu4_mb_coeff_data[1] = 0;
1415 pu4_mb_coeff_data[2] = 0;
1416 pu4_mb_coeff_data[3] = 0;
1417 (*pv_mb_coeff_data) = pu4_mb_coeff_data + 4;
1418
1419 /* Generate the control signal */
1420 /* Zero out the current plane's AC coefficients */
1421 (*pu4_cntrl) &= ((plane == u4_swap_uv) ? 0x0FFFFFFF : 0xF0FFFFFF);
1422
1423 /* Similarly do for the NNZ also */
1424 *(pu1_nnz_ac - 4) = 0;
1425 *(pu1_nnz_ac - 3) = 0;
1426 *(pu1_nnz_ac - 2) = 0;
1427 *(pu1_nnz_ac - 1) = 0;
1428 }
1429 else
1430 {
1431 *u1_cbp_c = u1_cbp_ac;
1432 }
1433
1434 if (u4_swap_uv)
1435 {
1436 pi2_res_sb = pi2_res_mb + i4_res_strd; /* Move to V plane ,ie 1dc row+ 4 ac row + 1 dc row */
1437 cntrl_pos = 31; /* The control bits are to be added for V bloc ie 31-4 th bit */
1438 pu1_nnz_ac = pu1_nnz + 1; /* Move the nnz to V block NNZ 1 dc + 1dc + 4 ac */
1439
1440 pu1_nnz_ac = pu1_nnz + 1;
1441 }
1442 else
1443 pu1_nnz_ac = pu1_nnz + 6; /* Go to nnz of V plane */
1444 }
1445
1446 /* restore the ptr basing on cbp */
1447 if (*u1_cbp_c == 0)
1448 {
1449 (*pv_mb_coeff_data) = ps_mb_coeff_data_dc;
1450 }
1451 else if (*u1_cbp_c == 1)
1452 {
1453 (*pv_mb_coeff_data) = ps_mb_coeff_data_ac;
1454 }
1455
1456 return ;
1457 }
1458
1459 /**
1460 *******************************************************************************
1461 *
1462 * @brief performs luma core coding when intra mode is i16x16
1463 *
1464 * @par Description:
1465 * If the current mb is to be coded as intra of mb type i16x16, the mb is first
1466 * predicted using one of i16x16 prediction filters, basing on the intra mode
1467 * chosen. Then, error is computed between the input blk and the estimated blk.
1468 * This error is transformed (hierarchical transform i.e., dct followed by hada-
1469 * -mard), quantized. The quantized coefficients are packed in scan order for
1470 * entropy coding.
1471 *
1472 * @param[in] ps_proc_ctxt
1473 * pointer to the current macro block context
1474 *
1475 * @returns u1_cbp_l
1476 * coded block pattern luma
1477 *
1478 * @remarks none
1479 *
1480 *******************************************************************************
1481 */
1482
ih264e_code_luma_intra_macroblock_16x16(process_ctxt_t * ps_proc)1483 UWORD8 ih264e_code_luma_intra_macroblock_16x16(process_ctxt_t *ps_proc)
1484 {
1485 /* Codec Context */
1486 codec_t *ps_codec = ps_proc->ps_codec;
1487
1488 /* pointer to ref macro block */
1489 UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_luma;
1490
1491 /* pointer to src macro block */
1492 UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma;
1493
1494 /* pointer to prediction macro block */
1495 UWORD8 *pu1_pred_mb = NULL;
1496
1497 /* pointer to residual macro block */
1498 WORD16 *pi2_res_mb = ps_proc->pi2_res_buf;
1499
1500 /* strides */
1501 WORD32 i4_src_strd = ps_proc->i4_src_strd;
1502 WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1503 WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
1504 WORD32 i4_res_strd = ps_proc->i4_res_strd;
1505
1506 /* intra mode */
1507 UWORD8 u1_intra_mode = ps_proc->u1_l_i16_mode;
1508
1509 /* coded block pattern */
1510 UWORD8 u1_cbp_l = 0;
1511
1512 /* number of non zero coeffs*/
1513 UWORD32 au4_nnz[5];
1514 UWORD8 *pu1_nnz = (UWORD8 *)au4_nnz;
1515
1516 /*Cntrol signal for itrans*/
1517 UWORD32 u4_cntrl;
1518
1519 /* quantization parameters */
1520 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
1521
1522 /* pointer to packed mb coeff data */
1523 void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
1524
1525 /* init nnz */
1526 au4_nnz[0] = 0;
1527 au4_nnz[1] = 0;
1528 au4_nnz[2] = 0;
1529 au4_nnz[3] = 0;
1530 au4_nnz[4] = 0;
1531
1532 if (u1_intra_mode == PLANE_I16x16)
1533 {
1534 pu1_pred_mb = ps_proc->pu1_pred_mb_intra_16x16_plane;
1535 }
1536 else
1537 {
1538 pu1_pred_mb = ps_proc->pu1_pred_mb_intra_16x16;
1539 }
1540
1541 /********************************************************/
1542 /* error estimation, */
1543 /* transform */
1544 /* quantization */
1545 /********************************************************/
1546 ih264e_luma_16x16_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb,
1547 pu1_pred_mb, pi2_res_mb,
1548 i4_src_strd, i4_pred_strd,
1549 i4_res_strd,
1550 ps_qp_params->pu2_scale_mat,
1551 ps_qp_params->pu2_thres_mat,
1552 ps_qp_params->u1_qbits,
1553 ps_qp_params->u4_dead_zone,
1554 pu1_nnz, ENABLE_DC_TRANSFORM);
1555
1556 /********************************************************/
1557 /* pack coeff data for entropy coding */
1558 /********************************************************/
1559 ih264e_pack_l_mb_i16(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_l,
1560 pu1_nnz, &u4_cntrl);
1561
1562 /********************************************************/
1563 /* ierror estimation, */
1564 /* itransform */
1565 /* iquantization */
1566 /********************************************************/
1567 /*
1568 *if refernce frame is not to be computed
1569 *we only need the right and bottom border 4x4 blocks to predict next intra
1570 *blocks, hence only compute them
1571 */
1572 if (!ps_proc->u4_compute_recon)
1573 {
1574 u4_cntrl &= 0x111F8000;
1575 }
1576
1577 if (u4_cntrl)
1578 {
1579 ih264e_luma_16x16_idctrans_iquant_itrans_recon(
1580 ps_codec, pi2_res_mb, pu1_pred_mb, pu1_ref_mb,
1581 i4_res_strd, i4_pred_strd, i4_rec_strd,
1582 ps_qp_params->pu2_iscale_mat,
1583 ps_qp_params->pu2_weigh_mat, ps_qp_params->u1_qp_div,
1584 u4_cntrl, ENABLE_DC_TRANSFORM,
1585 ps_proc->pv_scratch_buff);
1586 }
1587 else
1588 {
1589 ps_codec->pf_inter_pred_luma_copy(pu1_pred_mb, pu1_ref_mb, i4_pred_strd,
1590 i4_rec_strd, MB_SIZE, MB_SIZE, NULL,
1591 0);
1592 }
1593
1594 return (u1_cbp_l);
1595 }
1596
1597
1598 /**
1599 *******************************************************************************
1600 *
1601 * @brief performs luma core coding when intra mode is i4x4
1602 *
1603 * @par Description:
1604 * If the current mb is to be coded as intra of mb type i4x4, the mb is first
1605 * predicted using one of i4x4 prediction filters, basing on the intra mode
1606 * chosen. Then, error is computed between the input blk and the estimated blk.
1607 * This error is dct transformed and quantized. The quantized coefficients are
1608 * packed in scan order for entropy coding.
1609 *
1610 * @param[in] ps_proc_ctxt
1611 * pointer to the current macro block context
1612 *
1613 * @returns u1_cbp_l
1614 * coded block pattern luma
1615 *
1616 * @remarks
1617 * The traversal of 4x4 subblocks in the 16x16 macroblock is as per the scan order
1618 * mentioned in h.264 specification
1619 *
1620 *******************************************************************************
1621 */
ih264e_code_luma_intra_macroblock_4x4(process_ctxt_t * ps_proc)1622 UWORD8 ih264e_code_luma_intra_macroblock_4x4(process_ctxt_t *ps_proc)
1623 {
1624 /* Codec Context */
1625 codec_t *ps_codec = ps_proc->ps_codec;
1626
1627 /* pointer to ref macro block */
1628 UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_luma;
1629
1630 /* pointer to src macro block */
1631 UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma;
1632
1633 /* pointer to prediction macro block */
1634 UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
1635
1636 /* pointer to residual macro block */
1637 WORD16 *pi2_res_mb = ps_proc->pi2_res_buf;
1638
1639 /* strides */
1640 WORD32 i4_src_strd = ps_proc->i4_src_strd;
1641 WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1642 WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
1643
1644 /* pointer to neighbors: left, top, top-left */
1645 UWORD8 *pu1_mb_a;
1646 UWORD8 *pu1_mb_b;
1647 UWORD8 *pu1_mb_c;
1648 UWORD8 *pu1_mb_d;
1649
1650 /* intra mode */
1651 UWORD8 u1_intra_mode = ps_proc->u1_l_i16_mode;
1652
1653 /* neighbor availability */
1654 WORD32 i4_ngbr_avbl;
1655
1656 /* neighbor pels for intra prediction */
1657 UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels;
1658
1659 /* coded block pattern */
1660 UWORD8 u1_cbp_l = 0;
1661
1662 /* number of non zero coeffs*/
1663 UWORD8 u1_nnz;
1664
1665 /* quantization parameters */
1666 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
1667
1668 /* pointer to packed mb coeff data */
1669 void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
1670
1671 /* pointer to packed mb coeff data */
1672 tu_sblk_coeff_data_t *ps_mb_coeff_data, *ps_mb_coeff_data_b8;
1673
1674 /* no of non zero coefficients in the current sub block */
1675 UWORD32 u4_nnz_cnt;
1676
1677 /* significant coefficient map */
1678 UWORD32 u4_s_map;
1679
1680 /* pointer to scanning matrix */
1681 const UWORD8 *pu1_scan_order = gu1_luma_scan_order;
1682
1683 /*Dummy variable for 4x4 trans fucntion*/
1684 WORD16 i2_dc_dummy;
1685
1686 /* temp var */
1687 UWORD32 i, b8, b4, u1_blk_x, u1_blk_y, u1_pix_x, u1_pix_y, coeff_cnt, mask;
1688
1689 /* Process 16 4x4 lum sub-blocks of the MB in scan order */
1690 for (b8 = 0; b8 < 4; b8++)
1691 {
1692 u1_blk_x = GET_BLK_RASTER_POS_X(b8) << 3;
1693 u1_blk_y = GET_BLK_RASTER_POS_Y(b8) << 3;
1694
1695 /* if in case cbp for the 8x8 block is zero, send no residue */
1696 ps_mb_coeff_data_b8 = *pv_mb_coeff_data;
1697
1698 for (b4 = 0; b4 < 4; b4++)
1699 {
1700 /* index of pel in MB */
1701 u1_pix_x = u1_blk_x + (GET_SUB_BLK_RASTER_POS_X(b4) << 2);
1702 u1_pix_y = u1_blk_y + (GET_SUB_BLK_RASTER_POS_Y(b4) << 2);
1703
1704 /* Initialize source and reference pointers */
1705 pu1_curr_mb = ps_proc->pu1_src_buf_luma + u1_pix_x + (u1_pix_y * i4_src_strd);
1706 pu1_ref_mb = ps_proc->pu1_rec_buf_luma + u1_pix_x + (u1_pix_y * i4_rec_strd);
1707
1708 /* pointer to left of ref macro block */
1709 pu1_mb_a = pu1_ref_mb - 1;
1710 /* pointer to top of ref macro block */
1711 pu1_mb_b = pu1_ref_mb - i4_rec_strd;
1712 /* pointer to topright of ref macro block */
1713 pu1_mb_c = pu1_mb_b + 4;
1714 /* pointer to topleft macro block */
1715 pu1_mb_d = pu1_mb_b - 1;
1716
1717 /* compute neighbor availability */
1718 i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4];
1719
1720 /* sub block intra mode */
1721 u1_intra_mode = ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4];
1722
1723 /********************************************************/
1724 /* gather prediction pels from neighbors for prediction */
1725 /********************************************************/
1726 /* left pels */
1727 if (i4_ngbr_avbl & LEFT_MB_AVAILABLE_MASK)
1728 {
1729 for (i = 0; i < 4; i++)
1730 pu1_ngbr_pels_i4[4 - 1 - i] = pu1_mb_a[i * i4_rec_strd];
1731 }
1732 else
1733 {
1734 memset(pu1_ngbr_pels_i4, 0, 4);
1735 }
1736
1737 /* top pels */
1738 if (i4_ngbr_avbl & TOP_MB_AVAILABLE_MASK)
1739 {
1740 memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4);
1741 }
1742 else
1743 {
1744 memset(pu1_ngbr_pels_i4 + 5, 0, 4);
1745 }
1746 /* top left pels */
1747 if (i4_ngbr_avbl & TOP_LEFT_MB_AVAILABLE_MASK)
1748 {
1749 pu1_ngbr_pels_i4[4] = *pu1_mb_d;
1750 }
1751 else
1752 {
1753 pu1_ngbr_pels_i4[4] = 0;
1754 }
1755 /* top right pels */
1756 if (i4_ngbr_avbl & TOP_RIGHT_MB_AVAILABLE_MASK)
1757 {
1758 memcpy(pu1_ngbr_pels_i4+8+1,pu1_mb_c,4);
1759 }
1760 else if (i4_ngbr_avbl & TOP_MB_AVAILABLE_MASK)
1761 {
1762 memset(pu1_ngbr_pels_i4+8+1,pu1_ngbr_pels_i4[8],4);
1763 }
1764
1765 /********************************************************/
1766 /* prediction */
1767 /********************************************************/
1768 (ps_codec->apf_intra_pred_4_l)[u1_intra_mode](pu1_ngbr_pels_i4,
1769 pu1_pred_mb, 0,
1770 i4_pred_strd,
1771 i4_ngbr_avbl);
1772
1773 /********************************************************/
1774 /* error estimation, */
1775 /* transform */
1776 /* quantization */
1777 /********************************************************/
1778 ps_codec->pf_resi_trans_quant_4x4(pu1_curr_mb, pu1_pred_mb,
1779 pi2_res_mb, i4_src_strd,
1780 i4_pred_strd,
1781 ps_qp_params->pu2_scale_mat,
1782 ps_qp_params->pu2_thres_mat,
1783 ps_qp_params->u1_qbits,
1784 ps_qp_params->u4_dead_zone,
1785 &u1_nnz, &i2_dc_dummy);
1786
1787 /********************************************************/
1788 /* pack coeff data for entropy coding */
1789 /********************************************************/
1790 ps_mb_coeff_data = *pv_mb_coeff_data;
1791
1792 /* write number of non zero coefficients */
1793 ps_mb_coeff_data->i4_sig_map_nnz = u1_nnz;
1794
1795 if (u1_nnz)
1796 {
1797 for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u1_nnz; coeff_cnt++)
1798 {
1799 if (pi2_res_mb[pu1_scan_order[coeff_cnt]])
1800 {
1801 /* write residue */
1802 ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb[pu1_scan_order[coeff_cnt]];
1803 u4_s_map |= mask;
1804 }
1805 mask <<= 1;
1806 }
1807 /* write significant coeff map */
1808 ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
1809
1810 /* update ptr to coeff data */
1811 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt);
1812
1813 /* cbp */
1814 u1_cbp_l |= (1 << b8);
1815 }
1816 else
1817 {
1818 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
1819 }
1820
1821 /********************************************************/
1822 /* ierror estimation, */
1823 /* itransform */
1824 /* iquantization */
1825 /********************************************************/
1826 if (u1_nnz)
1827 ps_codec->pf_iquant_itrans_recon_4x4(
1828 pi2_res_mb, pu1_pred_mb, pu1_ref_mb,
1829 /*No input stride,*/i4_pred_strd,
1830 i4_rec_strd, ps_qp_params->pu2_iscale_mat,
1831 ps_qp_params->pu2_weigh_mat,
1832 ps_qp_params->u1_qp_div,
1833 ps_proc->pv_scratch_buff, 0, 0);
1834 else
1835 ps_codec->pf_inter_pred_luma_copy(pu1_pred_mb, pu1_ref_mb,
1836 i4_pred_strd, i4_rec_strd,
1837 BLK_SIZE, BLK_SIZE, NULL,
1838 0);
1839
1840 }
1841
1842 /* if the 8x8 block has no residue, nothing needs to be sent to entropy */
1843 if (!(u1_cbp_l & (1 << b8)))
1844 {
1845 *pv_mb_coeff_data = ps_mb_coeff_data_b8;
1846 }
1847 }
1848
1849 return (u1_cbp_l);
1850 }
1851
1852 /**
1853 *******************************************************************************
1854 *
1855 * @brief performs luma core coding when intra mode is i4x4
1856 *
1857 * @par Description:
1858 * If the current mb is to be coded as intra of mb type i4x4, the mb is first
1859 * predicted using one of i4x4 prediction filters, basing on the intra mode
1860 * chosen. Then, error is computed between the input blk and the estimated blk.
1861 * This error is dct transformed and quantized. The quantized coefficients are
1862 * packed in scan order for entropy coding.
1863 *
1864 * @param[in] ps_proc_ctxt
1865 * pointer to the current macro block context
1866 *
1867 * @returns u1_cbp_l
1868 * coded block pattern luma
1869 *
1870 * @remarks
1871 * The traversal of 4x4 subblocks in the 16x16 macroblock is as per the scan order
1872 * mentioned in h.264 specification
1873 *
1874 *******************************************************************************
1875 */
ih264e_code_luma_intra_macroblock_4x4_rdopt_on(process_ctxt_t * ps_proc)1876 UWORD8 ih264e_code_luma_intra_macroblock_4x4_rdopt_on(process_ctxt_t *ps_proc)
1877 {
1878 /* Codec Context */
1879 codec_t *ps_codec = ps_proc->ps_codec;
1880
1881 /* pointer to ref macro block */
1882 UWORD8 *pu1_ref_mb_intra_4x4 = ps_proc->pu1_ref_mb_intra_4x4;
1883
1884 /* pointer to recon buffer */
1885 UWORD8 *pu1_rec_mb = ps_proc->pu1_rec_buf_luma;
1886
1887 /* pointer to residual macro block */
1888 WORD16 *pi2_res_mb = ps_proc->pi2_res_buf_intra_4x4;
1889
1890 /* strides */
1891 WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1892
1893 /* number of non zero coeffs*/
1894 UWORD8 *pu1_nnz = (UWORD8 *)ps_proc->au4_nnz_intra_4x4;
1895
1896 /* coded block pattern */
1897 UWORD8 u1_cbp_l = 0;
1898
1899 /* pointer to packed mb coeff data */
1900 void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
1901
1902 /* pointer to packed mb coeff data */
1903 tu_sblk_coeff_data_t *ps_mb_coeff_data, *ps_mb_coeff_data_b8;
1904
1905 /* no of non zero coefficients in the current sub block */
1906 UWORD32 u4_nnz_cnt;
1907
1908 /* significant coefficient map */
1909 UWORD32 u4_s_map;
1910
1911 /* pointer to scanning matrix */
1912 const UWORD8 *pu1_scan_order = gu1_luma_scan_order;
1913
1914 /* temp var */
1915 UWORD32 b8, b4, coeff_cnt, mask;
1916
1917 /* Process 16 4x4 lum sub-blocks of the MB in scan order */
1918 for (b8 = 0; b8 < 4; b8++)
1919 {
1920 /* if in case cbp for the 8x8 block is zero, send no residue */
1921 ps_mb_coeff_data_b8 = *pv_mb_coeff_data;
1922
1923 for (b4 = 0; b4 < 4; b4++, pu1_nnz++, pi2_res_mb += MB_SIZE)
1924 {
1925 /********************************************************/
1926 /* pack coeff data for entropy coding */
1927 /********************************************************/
1928 ps_mb_coeff_data = *pv_mb_coeff_data;
1929
1930 /* write number of non zero coefficients */
1931 ps_mb_coeff_data->i4_sig_map_nnz = *pu1_nnz;
1932
1933 if (*pu1_nnz)
1934 {
1935 for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < *pu1_nnz; coeff_cnt++)
1936 {
1937 if (pi2_res_mb[pu1_scan_order[coeff_cnt]])
1938 {
1939 /* write residue */
1940 ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb[pu1_scan_order[coeff_cnt]];
1941 u4_s_map |= mask;
1942 }
1943 mask <<= 1;
1944 }
1945 /* write significant coeff map */
1946 ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
1947
1948 /* update ptr to coeff data */
1949 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt);
1950
1951 /* cbp */
1952 u1_cbp_l |= (1 << b8);
1953 }
1954 else
1955 {
1956 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
1957 }
1958 }
1959
1960 /* if the 8x8 block has no residue, nothing needs to be sent to entropy */
1961 if (!(u1_cbp_l & (1 << b8)))
1962 {
1963 *pv_mb_coeff_data = ps_mb_coeff_data_b8;
1964 }
1965 }
1966
1967 /* memcpy recon */
1968 ps_codec->pf_inter_pred_luma_copy(pu1_ref_mb_intra_4x4, pu1_rec_mb, MB_SIZE, i4_rec_strd, MB_SIZE, MB_SIZE, NULL, 0);
1969
1970 return (u1_cbp_l);
1971 }
1972
1973
1974 /**
1975 *******************************************************************************
1976 *
1977 * @brief performs chroma core coding for intra macro blocks
1978 *
1979 * @par Description:
1980 * If the current MB is to be intra coded with mb type chroma I8x8, the MB is
1981 * first predicted using intra 8x8 prediction filters. The predicted data is
1982 * compared with the input for error and the error is transformed. The DC
1983 * coefficients of each transformed sub blocks are further transformed using
1984 * Hadamard transform. The resulting coefficients are quantized, packed and sent
1985 * for entropy coding.
1986 *
1987 * @param[in] ps_proc_ctxt
1988 * pointer to the current macro block context
1989 *
1990 * @returns u1_cbp_c
1991 * coded block pattern chroma
1992 *
1993 * @remarks
1994 * The traversal of 4x4 subblocks in the 8x8 macroblock is as per the scan order
1995 * mentioned in h.264 specification
1996 *
1997 *******************************************************************************
1998 */
ih264e_code_chroma_intra_macroblock_8x8(process_ctxt_t * ps_proc)1999 UWORD8 ih264e_code_chroma_intra_macroblock_8x8(process_ctxt_t *ps_proc)
2000 {
2001 /* Codec Context */
2002 codec_t *ps_codec = ps_proc->ps_codec;
2003
2004 /* pointer to ref macro block */
2005 UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_chroma;
2006
2007 /* pointer to src macro block */
2008 UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_chroma;
2009
2010 /* pointer to prediction macro block */
2011 UWORD8 *pu1_pred_mb = NULL;
2012
2013 /* pointer to residual macro block */
2014 WORD16 *pi2_res_mb = ps_proc->pi2_res_buf;
2015
2016 /* strides */
2017 WORD32 i4_src_strd = ps_proc->i4_src_chroma_strd;
2018 WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
2019 WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
2020 WORD32 i4_res_strd = ps_proc->i4_res_strd;
2021
2022 /* intra mode */
2023 UWORD8 u1_intra_mode = ps_proc->u1_c_i8_mode;
2024
2025 /* coded block pattern */
2026 UWORD8 u1_cbp_c = 0;
2027
2028 /* number of non zero coeffs*/
2029 UWORD8 au1_nnz[18] = {0};
2030
2031 /* quantization parameters */
2032 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[1];
2033
2034 /* Control signal for inverse transform */
2035 UWORD32 u4_cntrl;
2036
2037 /* pointer to packed mb coeff data */
2038 void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
2039
2040 /* See if we need to swap U and V plances for entropy */
2041 UWORD32 u4_swap_uv = ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU;
2042
2043 if (PLANE_CH_I8x8 == u1_intra_mode)
2044 {
2045 pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma_plane;
2046 }
2047 else
2048 {
2049 pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma;
2050 }
2051
2052 /********************************************************/
2053 /* error estimation, */
2054 /* transform */
2055 /* quantization */
2056 /********************************************************/
2057 ih264e_chroma_8x8_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb,
2058 pu1_pred_mb, pi2_res_mb,
2059 i4_src_strd, i4_pred_strd,
2060 i4_res_strd,
2061 ps_qp_params->pu2_scale_mat,
2062 ps_qp_params->pu2_thres_mat,
2063 ps_qp_params->u1_qbits,
2064 ps_qp_params->u4_dead_zone,
2065 au1_nnz);
2066
2067 /********************************************************/
2068 /* pack coeff data for entropy coding */
2069 /********************************************************/
2070 ih264e_pack_c_mb(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_c,
2071 au1_nnz, ps_codec->u4_thres_resi, &u4_cntrl, u4_swap_uv);
2072
2073 /********************************************************/
2074 /* ierror estimation, */
2075 /* itransform */
2076 /* iquantization */
2077 /********************************************************/
2078 ih264e_chroma_8x8_idctrans_iquant_itrans_recon(ps_codec, pi2_res_mb,
2079 pu1_pred_mb, pu1_ref_mb,
2080 i4_res_strd, i4_pred_strd,
2081 i4_rec_strd,
2082 ps_qp_params->pu2_iscale_mat,
2083 ps_qp_params->pu2_weigh_mat,
2084 ps_qp_params->u1_qp_div,
2085 u4_cntrl,
2086 ps_proc->pv_scratch_buff);
2087 return (u1_cbp_c);
2088 }
2089
2090
2091 /**
2092 *******************************************************************************
2093 *
2094 * @brief performs luma core coding when mode is inter
2095 *
2096 * @par Description:
2097 * If the current mb is to be coded as inter the mb is predicted based on the
2098 * sub mb partitions and corresponding motion vectors generated by ME. Then,
2099 * error is computed between the input blk and the estimated blk. This error is
2100 * transformed, quantized. The quantized coefficients are packed in scan order
2101 * for entropy coding
2102 *
2103 * @param[in] ps_proc_ctxt
2104 * pointer to the current macro block context
2105 *
2106 * @returns u1_cbp_l
2107 * coded block pattern luma
2108 *
2109 * @remarks none
2110 *
2111 *******************************************************************************
2112 */
2113
ih264e_code_luma_inter_macroblock_16x16(process_ctxt_t * ps_proc)2114 UWORD8 ih264e_code_luma_inter_macroblock_16x16(process_ctxt_t *ps_proc)
2115 {
2116 /* Codec Context */
2117 codec_t *ps_codec = ps_proc->ps_codec;
2118
2119 /* pointer to ref macro block */
2120 UWORD8 *pu1_rec_mb = ps_proc->pu1_rec_buf_luma;
2121
2122 /* pointer to src macro block */
2123 UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma;
2124
2125 /* pointer to prediction macro block */
2126 UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
2127
2128 /* pointer to residual macro block */
2129 WORD16 *pi2_res_mb = ps_proc->pi2_res_buf;
2130
2131 /* strides */
2132 WORD32 i4_src_strd = ps_proc->i4_src_strd;
2133 WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
2134 WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
2135 WORD32 i4_res_strd = ps_proc->i4_res_strd;
2136
2137 /* coded block pattern */
2138 UWORD8 u1_cbp_l = 0;
2139
2140 /*Control signal of itrans*/
2141 UWORD32 u4_cntrl;
2142
2143 /* number of non zero coeffs*/
2144 UWORD8 *pu1_nnz = (UWORD8 *)ps_proc->au4_nnz;
2145
2146 /* quantization parameters */
2147 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
2148
2149 /* pointer to packed mb coeff data */
2150 void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
2151
2152 /* pseudo pred buffer */
2153 UWORD8 *pu1_pseudo_pred = pu1_pred_mb;
2154
2155 /* pseudo pred buffer stride */
2156 WORD32 i4_pseudo_pred_strd = i4_pred_strd;
2157
2158 /* init nnz */
2159 ps_proc->au4_nnz[0] = 0;
2160 ps_proc->au4_nnz[1] = 0;
2161 ps_proc->au4_nnz[2] = 0;
2162 ps_proc->au4_nnz[3] = 0;
2163 ps_proc->au4_nnz[4] = 0;
2164
2165 /********************************************************/
2166 /* prediction */
2167 /********************************************************/
2168 ih264e_motion_comp_luma(ps_proc, &pu1_pseudo_pred, &i4_pseudo_pred_strd);
2169
2170 /********************************************************/
2171 /* error estimation, */
2172 /* transform */
2173 /* quantization */
2174 /********************************************************/
2175 if (ps_proc->u4_min_sad_reached == 0 || ps_proc->u4_min_sad != 0)
2176 {
2177 ih264e_luma_16x16_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb,
2178 pu1_pseudo_pred, pi2_res_mb,
2179 i4_src_strd,
2180 i4_pseudo_pred_strd,
2181 i4_res_strd,
2182 ps_qp_params->pu2_scale_mat,
2183 ps_qp_params->pu2_thres_mat,
2184 ps_qp_params->u1_qbits,
2185 ps_qp_params->u4_dead_zone,
2186 pu1_nnz,
2187 DISABLE_DC_TRANSFORM);
2188
2189 /********************************************************/
2190 /* pack coeff data for entropy coding */
2191 /********************************************************/
2192 ih264e_pack_l_mb(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_l,
2193 pu1_nnz, ps_codec->u4_thres_resi, &u4_cntrl);
2194 }
2195 else
2196 {
2197 u1_cbp_l = 0;
2198 u4_cntrl = 0;
2199 }
2200
2201 /********************************************************/
2202 /* ierror estimation, */
2203 /* itransform */
2204 /* iquantization */
2205 /********************************************************/
2206
2207 /*If the frame is not to be used for P frame reference or dumping recon
2208 * we only will use the reocn for only predicting intra Mbs
2209 * THis will need only right and bottom edge 4x4 blocks recon
2210 * Hence we selectively enable them using control signal(including DC)
2211 */
2212 if (ps_proc->u4_compute_recon != 1)
2213 {
2214 u4_cntrl &= 0x111F0000;
2215 }
2216
2217 if (u4_cntrl)
2218 {
2219 ih264e_luma_16x16_idctrans_iquant_itrans_recon(
2220 ps_codec, pi2_res_mb, pu1_pseudo_pred, pu1_rec_mb,
2221 i4_res_strd, i4_pseudo_pred_strd, i4_rec_strd,
2222 ps_qp_params->pu2_iscale_mat,
2223 ps_qp_params->pu2_weigh_mat, ps_qp_params->u1_qp_div,
2224 u4_cntrl /*Cntrl*/, DISABLE_DC_TRANSFORM,
2225 ps_proc->pv_scratch_buff);
2226 }
2227 else
2228 {
2229 ps_codec->pf_inter_pred_luma_copy(pu1_pseudo_pred, pu1_rec_mb,
2230 i4_pseudo_pred_strd, i4_rec_strd,
2231 MB_SIZE, MB_SIZE, NULL, 0);
2232 }
2233
2234
2235 return (u1_cbp_l);
2236 }
2237
2238 /**
2239 *******************************************************************************
2240 *
2241 * @brief performs chroma core coding for inter macro blocks
2242 *
2243 * @par Description:
2244 * If the current mb is to be coded as inter predicted mb,based on the sub mb partitions
2245 * and corresponding motion vectors generated by ME ,prediction is done.
2246 * Then, error is computed between the input blk and the estimated blk.
2247 * This error is transformed , quantized. The quantized coefficients
2248 * are packed in scan order for
2249 * entropy coding.
2250 *
2251 * @param[in] ps_proc_ctxt
2252 * pointer to the current macro block context
2253 *
2254 * @returns u1_cbp_l
2255 * coded block pattern chroma
2256 *
2257 * @remarks none
2258 *
2259 *******************************************************************************
2260 */
ih264e_code_chroma_inter_macroblock_8x8(process_ctxt_t * ps_proc)2261 UWORD8 ih264e_code_chroma_inter_macroblock_8x8(process_ctxt_t *ps_proc)
2262 {
2263 /* Codec Context */
2264 codec_t *ps_codec = ps_proc->ps_codec;
2265
2266 /* pointer to ref macro block */
2267 UWORD8 *pu1_rec_mb = ps_proc->pu1_rec_buf_chroma;
2268
2269 /* pointer to src macro block */
2270 UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_chroma;
2271
2272 /* pointer to prediction macro block */
2273 UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
2274
2275 /* pointer to residual macro block */
2276 WORD16 *pi2_res_mb = ps_proc->pi2_res_buf;
2277
2278 /* strides */
2279 WORD32 i4_src_strd = ps_proc->i4_src_chroma_strd;
2280 WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
2281 WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
2282 WORD32 i4_res_strd = ps_proc->i4_res_strd;
2283
2284 /* coded block pattern */
2285 UWORD8 u1_cbp_c = 0;
2286
2287 /*Control signal for inverse transform*/
2288 UWORD32 u4_cntrl;
2289
2290 /* number of non zero coeffs*/
2291 UWORD8 au1_nnz[10] = {0};
2292
2293 /* quantization parameters */
2294 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[1];
2295
2296 /* pointer to packed mb coeff data */
2297 void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
2298
2299 /*See if we need to swap U and V plances for entropy*/
2300 UWORD32 u4_swap_uv = ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU;
2301
2302 /********************************************************/
2303 /* prediction */
2304 /********************************************************/
2305 ih264e_motion_comp_chroma(ps_proc);
2306
2307 /********************************************************/
2308 /* error estimation, */
2309 /* transform */
2310 /* quantization */
2311 /********************************************************/
2312 ih264e_chroma_8x8_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb,
2313 pu1_pred_mb, pi2_res_mb,
2314 i4_src_strd, i4_pred_strd,
2315 i4_res_strd,
2316 ps_qp_params->pu2_scale_mat,
2317 ps_qp_params->pu2_thres_mat,
2318 ps_qp_params->u1_qbits,
2319 ps_qp_params->u4_dead_zone,
2320 au1_nnz);
2321
2322 /********************************************************/
2323 /* pack coeff data for entropy coding */
2324 /********************************************************/
2325 ih264e_pack_c_mb(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_c,
2326 au1_nnz, ps_codec->u4_thres_resi, &u4_cntrl, u4_swap_uv);
2327
2328 /********************************************************/
2329 /* ierror estimation, */
2330 /* itransform */
2331 /* iquantization */
2332 /********************************************************/
2333
2334 /* If the frame is not to be used for P frame reference or dumping recon
2335 * we only will use the reocn for only predicting intra Mbs
2336 * THis will need only right and bottom edge 4x4 blocks recon
2337 * Hence we selectively enable them using control signal(including DC)
2338 */
2339 if (!ps_proc->u4_compute_recon)
2340 {
2341 u4_cntrl &= 0x7700C000;
2342 }
2343
2344 if (u4_cntrl)
2345 {
2346 ih264e_chroma_8x8_idctrans_iquant_itrans_recon(
2347 ps_codec, pi2_res_mb, pu1_pred_mb, pu1_rec_mb,
2348 i4_res_strd, i4_pred_strd, i4_rec_strd,
2349 ps_qp_params->pu2_iscale_mat,
2350 ps_qp_params->pu2_weigh_mat, ps_qp_params->u1_qp_div,
2351 u4_cntrl, ps_proc->pv_scratch_buff);
2352 }
2353 else
2354 {
2355 ps_codec->pf_inter_pred_luma_copy(pu1_pred_mb, pu1_rec_mb, i4_pred_strd,
2356 i4_rec_strd, MB_SIZE >> 1, MB_SIZE,
2357 NULL, 0);
2358 }
2359
2360 return (u1_cbp_c);
2361 }
2362