/* ------------------------------------------------------------------ * Copyright (C) 1998-2009 PacketVideo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. * See the License for the specific language governing permissions * and limitations under the License. * ------------------------------------------------------------------- */ #include "mp4def.h" #include "mp4lib_int.h" #include "mp4enc_lib.h" #include "dct.h" #include "m4venc_oscl.h" /* ======================================================================== */ /* Function : CodeMB_H263( ) */ /* Date : 8/15/2001 */ /* Purpose : Perform residue calc (only zero MV), DCT, H263 Quant/Dequant,*/ /* IDCT and motion compensation.Modified from FastCodeMB() */ /* Input : */ /* video Video encoder data structure */ /* function Approximate DCT function, scaling and threshold */ /* ncoefblck Array for last nonzero coeff for speedup in VlcEncode */ /* QP Combined offset from the origin to the current */ /* macroblock and QP for current MB. */ /* Output : */ /* video->outputMB Quantized DCT coefficients. */ /* currVop->yChan,uChan,vChan Reconstructed pixels */ /* */ /* Return : PV_STATUS */ /* Modified : */ /* 2/26/01 -modified threshold based on correlation coeff 0.75 only for mode H.263 -ncoefblck[] as input, to keep position of last non-zero coeff*/ /* 8/10/01 -modified threshold based on correlation coeff 0.5 -used column threshold to speedup column DCT. -used bitmap zigzag to speedup RunLevel(). */ /* ======================================================================== */ PV_STATUS CodeMB_H263(VideoEncData *video, approxDCT *function, Int QP, Int ncoefblck[]) { Int sad, k, CBP, mbnum = video->mbnum; Short *output, *dataBlock; UChar Mode = video->headerInfo.Mode[mbnum]; UChar *bitmapcol, *bitmaprow = video->bitmaprow; UInt *bitmapzz ; UChar shortHeader = video->vol[video->currLayer]->shortVideoHeader; Int dc_scaler = 8; Int intra = (Mode == MODE_INTRA || Mode == MODE_INTRA_Q); struct QPstruct QuantParam; Int dctMode, DctTh1; Int ColTh; Int(*BlockQuantDequantH263)(Short *, Short *, struct QPstruct *, UChar[], UChar *, UInt *, Int, Int, Int, UChar); Int(*BlockQuantDequantH263DC)(Short *, Short *, struct QPstruct *, UChar *, UInt *, Int, UChar); void (*BlockDCT1x1)(Short *, UChar *, UChar *, Int); void (*BlockDCT2x2)(Short *, UChar *, UChar *, Int); void (*BlockDCT4x4)(Short *, UChar *, UChar *, Int); void (*BlockDCT8x8)(Short *, UChar *, UChar *, Int); /* motion comp. related var. */ Vop *currVop = video->currVop; VideoEncFrameIO *inputFrame = video->input; Int ind_x = video->outputMB->mb_x; Int ind_y = video->outputMB->mb_y; Int lx = currVop->pitch; Int width = currVop->width; UChar *rec, *input, *pred; Int offset = QP >> 5; /* QP is combined offset and QP */ Int offsetc = (offset >> 2) + (ind_x << 2); /* offset for chrom */ /*****************************/ OSCL_UNUSED_ARG(function); output = video->outputMB->block[0]; CBP = 0; QP = QP & 0x1F; // M4VENC_MEMSET(output,0,(sizeof(Short)<<6)*6); /* reset quantized coeff. to zero , 7/24/01*/ QuantParam.QPx2 = QP << 1; QuantParam.QP = QP; QuantParam.QPdiv2 = QP >> 1; QuantParam.QPx2plus = QuantParam.QPx2 + QuantParam.QPdiv2; QuantParam.Addition = QP - 1 + (QP & 0x1); if (intra) { BlockDCT1x1 = &Block1x1DCTIntra; BlockDCT2x2 = &Block2x2DCT_AANIntra; BlockDCT4x4 = &Block4x4DCT_AANIntra; BlockDCT8x8 = &BlockDCT_AANIntra; BlockQuantDequantH263 = &BlockQuantDequantH263Intra; BlockQuantDequantH263DC = &BlockQuantDequantH263DCIntra; if (shortHeader) { dc_scaler = 8; } else { dc_scaler = cal_dc_scalerENC(QP, 1); /* luminance blocks */ } DctTh1 = (Int)(dc_scaler * 3);//*1.829 ColTh = ColThIntra[QP]; } else { BlockDCT1x1 = &Block1x1DCTwSub; BlockDCT2x2 = &Block2x2DCT_AANwSub; BlockDCT4x4 = &Block4x4DCT_AANwSub; BlockDCT8x8 = &BlockDCT_AANwSub; BlockQuantDequantH263 = &BlockQuantDequantH263Inter; BlockQuantDequantH263DC = &BlockQuantDequantH263DCInter; ColTh = ColThInter[QP]; DctTh1 = (Int)(16 * QP); //9*QP; } rec = currVop->yChan + offset; input = inputFrame->yChan + offset; if (lx != width) input -= (ind_y << 9); /* non-padded offset */ dataBlock = video->dataBlock; pred = video->predictedMB; for (k = 0; k < 6; k++) { CBP <<= 1; bitmapcol = video->bitmapcol[k]; bitmapzz = video->bitmapzz[k]; /* 7/30/01 */ if (k < 4) { sad = video->mot[mbnum][k+1].sad; if (k&1) { rec += 8; input += 8; } else if (k == 2) { dctMode = ((width << 3) - 8); input += dctMode; dctMode = ((lx << 3) - 8); rec += dctMode; } } else { if (k == 4) { rec = currVop->uChan + offsetc; input = inputFrame->uChan + offsetc; if (lx != width) input -= (ind_y << 7); lx >>= 1; width >>= 1; if (intra) { sad = getBlockSum(input, width); if (shortHeader) dc_scaler = 8; else { dc_scaler = cal_dc_scalerENC(QP, 2); /* chrominance blocks */ } DctTh1 = (Int)(dc_scaler * 3);//*1.829 } else sad = Sad8x8(input, pred, width); } else { rec = currVop->vChan + offsetc; input = inputFrame->vChan + offsetc; if (lx != width) input -= (ind_y << 7); if (intra) { sad = getBlockSum(input, width); } else sad = Sad8x8(input, pred, width); } } if (sad < DctTh1 && !(shortHeader && intra)) /* all-zero */ { /* For shortHeader intra block, DC value cannot be zero */ dctMode = 0; CBP |= 0; ncoefblck[k] = 0; } else if (sad < 18*QP/*(QP<<4)*/) /* DC-only */ { dctMode = 1; BlockDCT1x1(dataBlock, input, pred, width); CBP |= (*BlockQuantDequantH263DC)(dataBlock, output, &QuantParam, bitmaprow + k, bitmapzz, dc_scaler, shortHeader); ncoefblck[k] = 1; } else { dataBlock[64] = ColTh; if (sad < 22*QP/*(QP<<4)+(QP<<1)*/) /* 2x2 DCT */ { dctMode = 2; BlockDCT2x2(dataBlock, input, pred, width); ncoefblck[k] = 6; } else if (sad < (QP << 5)) /* 4x4 DCT */ { dctMode = 4; BlockDCT4x4(dataBlock, input, pred, width); ncoefblck[k] = 26; } else /* Full-DCT */ { dctMode = 8; BlockDCT8x8(dataBlock, input, pred, width); ncoefblck[k] = 64; } CBP |= (*BlockQuantDequantH263)(dataBlock, output, &QuantParam, bitmapcol, bitmaprow + k, bitmapzz, dctMode, k, dc_scaler, shortHeader); } BlockIDCTMotionComp(dataBlock, bitmapcol, bitmaprow[k], dctMode, rec, pred, (lx << 1) | intra); output += 64; if (!(k&1)) { pred += 8; } else { pred += 120; } } video->headerInfo.CBP[mbnum] = CBP; /* 5/18/2001 */ return PV_SUCCESS; } #ifndef NO_MPEG_QUANT /* ======================================================================== */ /* Function : CodeMB_MPEG( ) */ /* Date : 8/15/2001 */ /* Purpose : Perform residue calc (only zero MV), DCT, MPEG Quant/Dequant,*/ /* IDCT and motion compensation.Modified from FastCodeMB() */ /* Input : */ /* video Video encoder data structure */ /* function Approximate DCT function, scaling and threshold */ /* ncoefblck Array for last nonzero coeff for speedup in VlcEncode */ /* QP Combined offset from the origin to the current */ /* macroblock and QP for current MB. */ /* Output : */ /* video->outputMB Quantized DCT coefficients. */ /* currVop->yChan,uChan,vChan Reconstructed pixels */ /* */ /* Return : PV_STATUS */ /* Modified : */ /* 2/26/01 -modified threshold based on correlation coeff 0.75 only for mode H.263 -ncoefblck[] as input, keep position of last non-zero coeff*/ /* 8/10/01 -modified threshold based on correlation coeff 0.5 -used column threshold to speedup column DCT. -used bitmap zigzag to speedup RunLevel(). */ /* ======================================================================== */ PV_STATUS CodeMB_MPEG(VideoEncData *video, approxDCT *function, Int QP, Int ncoefblck[]) { Int sad, k, CBP, mbnum = video->mbnum; Short *output, *dataBlock; UChar Mode = video->headerInfo.Mode[mbnum]; UChar *bitmapcol, *bitmaprow = video->bitmaprow; UInt *bitmapzz ; Int dc_scaler = 8; Vol *currVol = video->vol[video->currLayer]; Int intra = (Mode == MODE_INTRA || Mode == MODE_INTRA_Q); Int *qmat; Int dctMode, DctTh1, DctTh2, DctTh3, DctTh4; Int ColTh; Int(*BlockQuantDequantMPEG)(Short *, Short *, Int, Int *, UChar [], UChar *, UInt *, Int, Int, Int); Int(*BlockQuantDequantMPEGDC)(Short *, Short *, Int, Int *, UChar [], UChar *, UInt *, Int); void (*BlockDCT1x1)(Short *, UChar *, UChar *, Int); void (*BlockDCT2x2)(Short *, UChar *, UChar *, Int); void (*BlockDCT4x4)(Short *, UChar *, UChar *, Int); void (*BlockDCT8x8)(Short *, UChar *, UChar *, Int); /* motion comp. related var. */ Vop *currVop = video->currVop; VideoEncFrameIO *inputFrame = video->input; Int ind_x = video->outputMB->mb_x; Int ind_y = video->outputMB->mb_y; Int lx = currVop->pitch; Int width = currVop->width; UChar *rec, *input, *pred; Int offset = QP >> 5; Int offsetc = (offset >> 2) + (ind_x << 2); /* offset for chrom */ /*****************************/ OSCL_UNUSED_ARG(function); output = video->outputMB->block[0]; CBP = 0; QP = QP & 0x1F; // M4VENC_MEMSET(output,0,(sizeof(Short)<<6)*6); /* reset quantized coeff. to zero , 7/24/01*/ if (intra) { BlockDCT1x1 = &Block1x1DCTIntra; BlockDCT2x2 = &Block2x2DCT_AANIntra; BlockDCT4x4 = &Block4x4DCT_AANIntra; BlockDCT8x8 = &BlockDCT_AANIntra; BlockQuantDequantMPEG = &BlockQuantDequantMPEGIntra; BlockQuantDequantMPEGDC = &BlockQuantDequantMPEGDCIntra; dc_scaler = cal_dc_scalerENC(QP, 1); /* luminance blocks */ qmat = currVol->iqmat; DctTh1 = (Int)(3 * dc_scaler);//2*dc_scaler); DctTh2 = (Int)((1.25 * QP - 1) * qmat[1] * 0.45);//0.567);//0.567); DctTh3 = (Int)((1.25 * QP - 1) * qmat[2] * 0.55);//1.162); /* 8/2/2001 */ DctTh4 = (Int)((1.25 * QP - 1) * qmat[32] * 0.8);//1.7583);//0.7942); ColTh = ColThIntra[QP]; } else { BlockDCT1x1 = &Block1x1DCTwSub; BlockDCT2x2 = &Block2x2DCT_AANwSub; BlockDCT4x4 = &Block4x4DCT_AANwSub; BlockDCT8x8 = &BlockDCT_AANwSub; BlockQuantDequantMPEG = &BlockQuantDequantMPEGInter; BlockQuantDequantMPEGDC = &BlockQuantDequantMPEGDCInter; qmat = currVol->niqmat; DctTh1 = (Int)(((QP << 1) - 0.5) * qmat[0] * 0.4);//0.2286);//0.3062); DctTh2 = (Int)(((QP << 1) - 0.5) * qmat[1] * 0.45);//0.567);//0.4); DctTh3 = (Int)(((QP << 1) - 0.5) * qmat[2] * 0.55);//1.162); /* 8/2/2001 */ DctTh4 = (Int)(((QP << 1) - 0.5) * qmat[32] * 0.8);//1.7583);//0.7942); ColTh = ColThInter[QP]; }// get qmat, DctTh1, DctTh2, DctTh3 rec = currVop->yChan + offset; input = inputFrame->yChan + offset; if (lx != width) input -= (ind_y << 9); /* non-padded offset */ dataBlock = video->dataBlock; pred = video->predictedMB; for (k = 0; k < 6; k++) { CBP <<= 1; bitmapcol = video->bitmapcol[k]; bitmapzz = video->bitmapzz[k]; /* 8/2/01 */ if (k < 4) {//Y block sad = video->mot[mbnum][k+1].sad; if (k&1) { rec += 8; input += 8; } else if (k == 2) { dctMode = ((width << 3) - 8); input += dctMode; dctMode = ((lx << 3) - 8); rec += dctMode; } } else {// U, V block if (k == 4) { rec = currVop->uChan + offsetc; input = inputFrame->uChan + offsetc; if (lx != width) input -= (ind_y << 7); lx >>= 1; width >>= 1; if (intra) { dc_scaler = cal_dc_scalerENC(QP, 2); /* luminance blocks */ DctTh1 = dc_scaler * 3; sad = getBlockSum(input, width); } else sad = Sad8x8(input, pred, width); } else { rec = currVop->vChan + offsetc; input = inputFrame->vChan + offsetc; if (lx != width) input -= (ind_y << 7); if (intra) sad = getBlockSum(input, width); else sad = Sad8x8(input, pred, width); } } if (sad < DctTh1) /* all-zero */ { dctMode = 0; CBP |= 0; ncoefblck[k] = 0; } else if (sad < DctTh2) /* DC-only */ { dctMode = 1; BlockDCT1x1(dataBlock, input, pred, width); CBP |= (*BlockQuantDequantMPEGDC)(dataBlock, output, QP, qmat, bitmapcol, bitmaprow + k, bitmapzz, dc_scaler); ncoefblck[k] = 1; } else { dataBlock[64] = ColTh; if (sad < DctTh3) /* 2x2-DCT */ { dctMode = 2; BlockDCT2x2(dataBlock, input, pred, width); ncoefblck[k] = 6; } else if (sad < DctTh4) /* 4x4 DCT */ { dctMode = 4; BlockDCT4x4(dataBlock, input, pred, width); ncoefblck[k] = 26; } else /* full-DCT */ { dctMode = 8; BlockDCT8x8(dataBlock, input, pred, width); ncoefblck[k] = 64; } CBP |= (*BlockQuantDequantMPEG)(dataBlock, output, QP, qmat, bitmapcol, bitmaprow + k, bitmapzz, dctMode, k, dc_scaler); // } dctMode = 8; /* for mismatch handle */ BlockIDCTMotionComp(dataBlock, bitmapcol, bitmaprow[k], dctMode, rec, pred, (lx << 1) | (intra)); output += 64; if (!(k&1)) { pred += 8; } else { pred += 120; } } video->headerInfo.CBP[mbnum] = CBP; /* 5/18/2001 */ return PV_SUCCESS; } #endif /* ======================================================================== */ /* Function : getBlockSAV( ) */ /* Date : 8/10/2000 */ /* Purpose : Get SAV for one block */ /* In/out : block[64] contain one block data */ /* Return : */ /* Modified : */ /* ======================================================================== */ /* can be written in MMX or SSE, 2/22/2001 */ Int getBlockSAV(Short block[]) { Int i, val, sav = 0; i = 8; while (i--) { val = *block++; if (val > 0) sav += val; else sav -= val; val = *block++; if (val > 0) sav += val; else sav -= val; val = *block++; if (val > 0) sav += val; else sav -= val; val = *block++; if (val > 0) sav += val; else sav -= val; val = *block++; if (val > 0) sav += val; else sav -= val; val = *block++; if (val > 0) sav += val; else sav -= val; val = *block++; if (val > 0) sav += val; else sav -= val; val = *block++; if (val > 0) sav += val; else sav -= val; } return sav; } /* ======================================================================== */ /* Function : Sad8x8( ) */ /* Date : 8/10/2000 */ /* Purpose : Find SAD between prev block and current block */ /* In/out : Previous and current frame block pointers, and frame width */ /* Return : */ /* Modified : */ /* 8/15/01, - do 4 pixel at a time assuming 32 bit register */ /* ======================================================================== */ #ifdef __clang__ __attribute((no_sanitize("integer"))) #endif Int Sad8x8(UChar *cur, UChar *prev, Int width) { UChar *end = cur + (width << 3); Int sad = 0; Int *curInt = (Int*) cur; Int *prevInt = (Int*) prev; Int cur1, cur2, prev1, prev2; UInt mask, sgn_msk = 0x80808080; Int sum2 = 0, sum4 = 0; Int tmp; do { mask = ~(0xFF00); cur1 = curInt[1]; /* load cur[4..7] */ cur2 = curInt[0]; curInt += (width >> 2); /* load cur[0..3] and +=lx */ prev1 = prevInt[1]; prev2 = prevInt[0]; prevInt += 4; tmp = prev2 ^ cur2; cur2 = prev2 - cur2; tmp = tmp ^ cur2; /* (^)^(-) last bit is one if carry */ tmp = sgn_msk & ((UInt)tmp >> 1); /* check the sign of each byte */ if (cur2 < 0) tmp = tmp | 0x80000000; /* corcurt sign of first byte */ tmp = (tmp << 8) - tmp; /* carry borrowed bytes are marked with 0x1FE */ cur2 = cur2 + (tmp >> 7); /* negative bytes is added with 0xFF, -1 */ cur2 = cur2 ^(tmp >> 7); /* take absolute by inverting bits (EOR) */ tmp = prev1 ^ cur1; cur1 = prev1 - cur1; tmp = tmp ^ cur1; /* (^)^(-) last bit is one if carry */ tmp = sgn_msk & ((UInt)tmp >> 1); /* check the sign of each byte */ if (cur1 < 0) tmp = tmp | 0x80000000; /* corcurt sign of first byte */ tmp = (tmp << 8) - tmp; /* carry borrowed bytes are marked with 0x1FE */ cur1 = cur1 + (tmp >> 7); /* negative bytes is added with 0xFF, -1 */ cur1 = cur1 ^(tmp >> 7); /* take absolute by inverting bits (EOR) */ sum4 = sum4 + cur1; cur1 = cur1 & (mask << 8); /* mask first and third bytes */ sum2 = sum2 + ((UInt)cur1 >> 8); sum4 = sum4 + cur2; cur2 = cur2 & (mask << 8); /* mask first and third bytes */ sum2 = sum2 + ((UInt)cur2 >> 8); } while ((uintptr_t)curInt < (uintptr_t)end); cur1 = sum4 - (sum2 << 8); /* get even-sum */ cur1 = cur1 + sum2; /* add 16 bit even-sum and odd-sum*/ cur1 = cur1 + (cur1 << 16); /* add upper and lower 16 bit sum */ sad = ((UInt)cur1 >> 16); /* take upper 16 bit */ return sad; } /* ======================================================================== */ /* Function : getBlockSum( ) */ /* Date : 8/10/2000 */ /* Purpose : Find summation of value within a block. */ /* In/out : Pointer to current block in a frame and frame width */ /* Return : */ /* Modified : */ /* 8/15/01, - SIMD 4 pixels at a time */ /* ======================================================================== */ #ifdef __clang__ __attribute((no_sanitize("integer"))) #endif Int getBlockSum(UChar *cur, Int width) { Int sad = 0, sum4 = 0, sum2 = 0; UChar *end = cur + (width << 3); Int *curInt = (Int*)cur; UInt mask = ~(0xFF00); Int load1, load2; do { load1 = curInt[1]; load2 = curInt[0]; curInt += (width >> 2); sum4 += load1; load1 = load1 & (mask << 8); /* even bytes */ sum2 += ((UInt)load1 >> 8); /* sum even bytes, 16 bit */ sum4 += load2; load2 = load2 & (mask << 8); /* even bytes */ sum2 += ((UInt)load2 >> 8); /* sum even bytes, 16 bit */ } while ((uintptr_t)curInt < (uintptr_t)end); load1 = sum4 - (sum2 << 8); /* get even-sum */ load1 = load1 + sum2; /* add 16 bit even-sum and odd-sum*/ load1 = load1 + (load1 << 16); /* add upper and lower 16 bit sum */ sad = ((UInt)load1 >> 16); /* take upper 16 bit */ return sad; }