先推荐几篇帧内预测的文章:
https://blog.csdn.net/shaqoneal/article/details/44856469
https://blog.csdn.net/nb_vol_1/article/details/51144828
https://blog.csdn.net/cpp12341234/article/details/46043615
https://blog.csdn.net/m0_37579288/article/details/79153952
和HEVC_CJL大神的帧内预测系列https://blog.csdn.net/HEVC_CJL/article/details/8175721
下面写一下我对HM16.9帧内预测部分代码的理解。
编码的入口函数是encmain.cpp文件中的main函数,调用encode函数进行编码
cTAppEncTop.encode();
TAppEncTop::encode函数的处理流程是读入m_iGOPSize大小的帧统一处理,调用下面函数
m_cTEncTop.encode( bEos, flush ? 0 : pcPicYuvOrg, flush ? 0 : &cPicYuvTrueOrg, snrCSC, m_cListPicYuvRec, outputAccessUnits, iNumEncoded );//帧编码
TEncTop::encode函数调用下面函数处理GOP:
// compress GOP
m_cGOPEncoder.compressGOP(m_iPOCLast, m_iNumPicRcvd, m_cListPic, rcListPicYuvRecOut, accessUnitsOut, false, false, snrCSC, m_printFrameMSE);
TEncGOP::compressGOP函数的处理流程是遍历GOP中的每一帧,处理每一帧的Slice:
m_pcSliceEncoder->compressSlice ( pcPic, false, false );
TEncSlice::compressSlice函数是对Slice中的每一个CTU(64x64)进行处理:
for( UInt ctuTsAddr = startCtuTsAddr; ctuTsAddr < boundingCtuTsAddr; ++ctuTsAddr )
{
...
// initialize CTU encoder
TComDataCU* pCtu = pcPic->getCtu( ctuRsAddr );
pCtu->initCtu( pcPic, ctuRsAddr );
...
// run CTU trial encoder
m_pcCuEncoder->compressCtu( pCtu );
...
m_uiPicTotalBits += pCtu->getTotalBits();
m_dPicRdCost += pCtu->getTotalCost();
m_uiPicDist += pCtu->getTotalDistortion();
}
TEncCu::compressCtu函数就是调用xCompressCU函数处理CU,其中最优的CU划分存储在m_ppcBestCU[0]变量中:
xCompressCU( m_ppcBestCU[0], m_ppcTempCU[0], 0 DEBUG_STRING_PASS_INTO(sDebug) );
对于帧内预测,TEncCu::xCompressCU函数的处理流程是先判断当前CU是否到边界,如果不到,则进行帧内预测的处理;然后判断当前CU是否可以继续划分CU,若可以,则划分成4个CU,递归调用xCompressCU函数进行处理:
Void TEncCu::xCompressCU( TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU, const UInt uiDepth DEBUG_STRING_FN_DECLARE(sDebug_), PartSize eParentPartSize )
{
//变量定义和初始化省略
...
//获取当前CU的左、右下像素位置,用于判断是否到达边界
const UInt uiLPelX = rpcBestCU->getCUPelX();
const UInt uiRPelX = uiLPelX + rpcBestCU->getWidth(0) - 1;//m_puhWidth[uiIdx]
const UInt uiTPelY = rpcBestCU->getCUPelY();
const UInt uiBPelY = uiTPelY + rpcBestCU->getHeight(0) - 1;
const UInt uiWidth = rpcBestCU->getWidth(0);
//各种控制判断处理,省略
...
//判断是否到边界
const Bool bBoundary = !( uiRPelX < sps.getPicWidthInLumaSamples() && uiBPelY < sps.getPicHeightInLumaSamples() );
if ( !bBoundary )
{
...
// do inter modes, SKIP and 2Nx2N 帧间模式省略
if( rpcBestCU->getSlice()->getSliceType() != I_SLICE )
{
...
}
...
// do normal intra modes帧内模式
// speedup for inter frames
if((rpcBestCU->getSlice()->getSliceType() == I_SLICE) ||
((!m_pcEncCfg->getDisableIntraPUsInInterSlices()) && (
(rpcBestCU->getCbf( 0, COMPONENT_Y ) != 0) ||
((rpcBestCU->getCbf( 0, COMPONENT_Cb ) != 0) && (numberValidComponents > COMPONENT_Cb)) ||
((rpcBestCU->getCbf( 0, COMPONENT_Cr ) != 0) && (numberValidComponents > COMPONENT_Cr)) // avoid very complex intra if it is unlikely
)))//判断是否进行帧内模式
{
xCheckRDCostIntra( rpcBestCU, rpcTempCU, SIZE_2Nx2N DEBUG_STRING_PASS_INTO(sDebug) );//帧内模式处理入口,2Nx2N的PU划分
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
if( uiDepth == sps.getLog2DiffMaxMinCodingBlockSize())//如果当前CU的深度等于CB的最大块和最小块的差距,正常是3
{
if( rpcTempCU->getWidth(0) > ( 1 << sps.getQuadtreeTULog2MinSize() ) )//若当前CU的长大于最小TU块长度,正常是4
{
xCheckRDCostIntra( rpcBestCU, rpcTempCU, SIZE_NxN DEBUG_STRING_PASS_INTO(sDebug) );//进行NxN的PU划分进行处理
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
}
}
}
...
}
...
//判断子块是否到边界
const Bool bSubBranch = bBoundary || !( m_pcEncCfg->getUseEarlyCU() && rpcBestCU->getTotalCost()!=MAX_DOUBLE && rpcBestCU->isSkipped(0) );
//若当前CU可以继续划分
if( bSubBranch && uiDepth < sps.getLog2DiffMaxMinCodingBlockSize() && (!getFastDeltaQp() || uiWidth > fastDeltaQPCuMaxSize || bBoundary))
{
...
//划分成4个CU继续调用 xCompressCU函数递归
for ( UInt uiPartUnitIdx = 0; uiPartUnitIdx < 4; uiPartUnitIdx++ )
{
if ( !(rpcBestCU->getTotalCost()!=MAX_DOUBLE && rpcBestCU->isInter(0)) )
{
xCompressCU( pcSubBestPartCU, pcSubTempPartCU, uhNextDepth DEBUG_STRING_PASS_INTO(sChild), NUMBER_OF_PART_SIZES );
}
else
{
xCompressCU( pcSubBestPartCU, pcSubTempPartCU, uhNextDepth DEBUG_STRING_PASS_INTO(sChild), rpcBestCU->getPartitionSize(0) );
}
DEBUG_STRING_APPEND(sTempDebug, sChild)
#else
xCompressCU( pcSubBestPartCU, pcSubTempPartCU, uhNextDepth );
...
}
...
}
...
}
TEncCu::xCheckRDCostIntra函数是帧内预测的入口函数,它调用estIntraPredLumaQT函数进行亮度分量的预测、变换、量化和编码;调用estIntraPredChromaQT函数进行色度分量的处理;最后调用xCheckBestMode函数check最优的模式:
Void TEncCu::xCheckRDCostIntra( TComDataCU *&rpcBestCU,
TComDataCU *&rpcTempCU,
PartSize eSize
DEBUG_STRING_FN_DECLARE(sDebug) )
{
DEBUG_STRING_NEW(sTest)
if(getFastDeltaQp())//默认false
{
const TComSPS &sps=*(rpcTempCU->getSlice()->getSPS());
const UInt fastDeltaQPCuMaxSize = Clip3(sps.getMaxCUHeight()>>(sps.getLog2DiffMaxMinCodingBlockSize()), sps.getMaxCUHeight(), 32u);
if(rpcTempCU->getWidth( 0 ) > fastDeltaQPCuMaxSize)
{
return; // only check necessary 2Nx2N Intra in fast deltaqp mode
}
}
//设置参数
UInt uiDepth = rpcTempCU->getDepth( 0 );
rpcTempCU->setSkipFlagSubParts( false, 0, uiDepth );
rpcTempCU->setPartSizeSubParts( eSize, 0, uiDepth );
rpcTempCU->setPredModeSubParts( MODE_INTRA, 0, uiDepth );
rpcTempCU->setChromaQpAdjSubParts( rpcTempCU->getCUTransquantBypass(0) ? 0 : m_cuChromaQpOffsetIdxPlus1, 0, uiDepth );
Pel resiLuma[NUMBER_OF_STORED_RESIDUAL_TYPES][MAX_CU_SIZE * MAX_CU_SIZE];
//亮度分量预测、变换、量化、编码
m_pcPredSearch->estIntraPredLumaQT( rpcTempCU, m_ppcOrigYuv[uiDepth], m_ppcPredYuvTemp[uiDepth], m_ppcResiYuvTemp[uiDepth], m_ppcRecoYuvTemp[uiDepth], resiLuma DEBUG_STRING_PASS_INTO(sTest) );
m_ppcRecoYuvTemp[uiDepth]->copyToPicComponent(COMPONENT_Y, rpcTempCU->getPic()->getPicYuvRec(), rpcTempCU->getCtuRsAddr(), rpcTempCU->getZorderIdxInCtu() );
if (rpcBestCU->getPic()->getChromaFormat()!=CHROMA_400)//如果有色度分量
{
//色度分量处理
m_pcPredSearch->estIntraPredChromaQT( rpcTempCU, m_ppcOrigYuv[uiDepth], m_ppcPredYuvTemp[uiDepth], m_ppcResiYuvTemp[uiDepth], m_ppcRecoYuvTemp[uiDepth], resiLuma DEBUG_STRING_PASS_INTO(sTest) );
}
m_pcEntropyCoder->resetBits();
if ( rpcTempCU->getSlice()->getPPS()->getTransquantBypassEnableFlag())
{
m_pcEntropyCoder->encodeCUTransquantBypassFlag( rpcTempCU, 0, true );
}
//编码参数
m_pcEntropyCoder->encodeSkipFlag ( rpcTempCU, 0, true );
m_pcEntropyCoder->encodePredMode( rpcTempCU, 0, true );
m_pcEntropyCoder->encodePartSize( rpcTempCU, 0, uiDepth, true );
m_pcEntropyCoder->encodePredInfo( rpcTempCU, 0 );
m_pcEntropyCoder->encodeIPCMInfo(rpcTempCU, 0, true );
// Encode Coefficients
Bool bCodeDQP = getdQPFlag();//m_bEncodeDQP==false
Bool codeChromaQpAdjFlag = getCodeChromaQpAdjFlag();//m_stillToCodeChromaQpOffsetFlag==false
m_pcEntropyCoder->encodeCoeff( rpcTempCU, 0, uiDepth, bCodeDQP, codeChromaQpAdjFlag );
setCodeChromaQpAdjFlag( codeChromaQpAdjFlag );
setdQPFlag( bCodeDQP );
m_pcRDGoOnSbacCoder->store(m_pppcRDSbacCoder[uiDepth][CI_TEMP_BEST]);
//统计
rpcTempCU->getTotalBits() = m_pcEntropyCoder->getNumberOfWrittenBits();
rpcTempCU->getTotalBins() = ((TEncBinCABAC *)((TEncSbac*)m_pcEntropyCoder->m_pcEntropyCoderIf)->getEncBinIf())->getBinsCoded();
rpcTempCU->getTotalCost() = m_pcRdCost->calcRdCost( rpcTempCU->getTotalBits(), rpcTempCU->getTotalDistortion() );
xCheckDQP( rpcTempCU );
//Check最优模式
xCheckBestMode(rpcBestCU, rpcTempCU, uiDepth DEBUG_STRING_PASS_INTO(sDebug) DEBUG_STRING_PASS_INTO(sTest));
}
estIntraPredLumaQT函数是进行预测、变换、量化和编码的函数,处理流程是:循环处理每一个PU块,对于每一个PU块:遍历35个模式选择出numModesForFullRD个最优模式uiRdModeList,然后将MPM和numModesForFullRD个模式不同的模式也加入到uiRdModeList中;然后循环uiRdModeList,对每一个模式调用xRecurIntraCodingLumaQT函数进行TU(这时的PU除非64x64的块否则不划分)的预测、变换、量化和编码;然后选出最小的Cost对应的模式;最后对于最优的模式再次调用xRecurIntraCodingLumaQT函数进行TU(这时对PU进行划分)的预测、变换量化和编码,选择最小的Cost对应的模式。
Void
TEncSearch::estIntraPredLumaQT(TComDataCU* pcCU,
TComYuv* pcOrgYuv,
TComYuv* pcPredYuv,
TComYuv* pcResiYuv,
TComYuv* pcRecoYuv,
Pel resiLuma[NUMBER_OF_STORED_RESIDUAL_TYPES][MAX_CU_SIZE * MAX_CU_SIZE]
DEBUG_STRING_FN_DECLARE(sDebug))
{
//定义变量
...
//===== loop over partitions =====
TComTURecurse tuRecurseCU(pcCU, 0);
TComTURecurse tuRecurseWithPU(tuRecurseCU, false, (uiInitTrDepth==0)?TComTU::DONT_SPLIT : TComTU::QUAD_SPLIT);//划分的PU数
do//循环每一个PU
{
const UInt uiPartOffset=tuRecurseWithPU.GetAbsPartIdxTU();
//===== init pattern for luma prediction =====
DEBUG_STRING_NEW(sTemp2)
//===== determine set of modes to be tested (using prediction signal only) =====
Int numModesAvailable = 35; //total number of Intra modes
UInt uiRdModeList[FAST_UDI_MAX_RDMODE_NUM];
Int numModesForFullRD = m_pcEncCfg->getFastUDIUseMPMEnabled()?g_aucIntraModeNumFast_UseMPM[ uiWidthBit ] : g_aucIntraModeNumFast_NotUseMPM[ uiWidthBit ];//当前PU块大小对应的模式数
// this should always be true
assert (tuRecurseWithPU.ProcessComponentSection(COMPONENT_Y));
initIntraPatternChType( tuRecurseWithPU, COMPONENT_Y, true DEBUG_STRING_PASS_INTO(sTemp2) );//填充参考像素和参考像素平滑
Bool doFastSearch = (numModesForFullRD != numModesAvailable);//true
if (doFastSearch)
{
assert(numModesForFullRD < numModesAvailable);
for( Int i=0; i < numModesForFullRD; i++ )
{
CandCostList[ i ] = MAX_DOUBLE;
}
CandNum = 0;
const TComRectangle &puRect=tuRecurseWithPU.getRect(COMPONENT_Y);
const UInt uiAbsPartIdx=tuRecurseWithPU.GetAbsPartIdxTU();
Pel* piOrg = pcOrgYuv ->getAddr( COMPONENT_Y, uiAbsPartIdx );
Pel* piPred = pcPredYuv->getAddr( COMPONENT_Y, uiAbsPartIdx );
UInt uiStride = pcPredYuv->getStride( COMPONENT_Y );
DistParam distParam;
const Bool bUseHadamard=pcCU->getCUTransquantBypass(0) == 0;
m_pcRdCost->setDistParam(distParam, sps.getBitDepth(CHANNEL_TYPE_LUMA), piOrg, uiStride, piPred, uiStride, puRect.width, puRect.height, bUseHadamard);
distParam.bApplyWeight = false;
//遍历35个模式
for( Int modeIdx = 0; modeIdx < numModesAvailable; modeIdx++ )
{
UInt uiMode = modeIdx;
Distortion uiSad = 0;
//判断是否进行参考像素平滑
const Bool bUseFilter=TComPrediction::filteringIntraReferenceSamples(COMPONENT_Y, uiMode, puRect.width, puRect.height, chFmt, sps.getSpsRangeExtension().getIntraSmoothingDisabledFlag());
//模式预测
predIntraAng( COMPONENT_Y, uiMode, piOrg, uiStride, piPred, uiStride, tuRecurseWithPU, bUseFilter, TComPrediction::UseDPCMForFirstPassIntraEstimation(tuRecurseWithPU, uiMode) );
// use hadamard transform here
uiSad+=distParam.DistFunc(&distParam);
UInt iModeBits = 0;
// NB xModeBitsIntra will not affect the mode for chroma that may have already been pre-estimated.
iModeBits+=xModeBitsIntra( pcCU, uiMode, uiPartOffset, uiDepth, CHANNEL_TYPE_LUMA );
Double cost = (Double)uiSad + (Double)iModeBits * sqrtLambdaForFirstPass;
...
//更新列表,选出numModesForFullRD个最优模式
CandNum += xUpdateCandList( uiMode, cost, numModesForFullRD, uiRdModeList, CandCostList );
}
if (m_pcEncCfg->getFastUDIUseMPMEnabled())
{
Int uiPreds[NUM_MOST_PROBABLE_MODES] = {-1, -1, -1};
Int iMode = -1;
pcCU->getIntraDirPredictor( uiPartOffset, uiPreds, COMPONENT_Y, &iMode );//获取MPMs
const Int numCand = ( iMode >= 0 ) ? iMode : Int(NUM_MOST_PROBABLE_MODES);
//将MPMs和先前选出的模式不同的模式加入到uiRdModeList模式中
for( Int j=0; j < numCand; j++)
{
Bool mostProbableModeIncluded = false;
Int mostProbableMode = uiPreds[j];
for( Int i=0; i < numModesForFullRD; i++)
{
mostProbableModeIncluded |= (mostProbableMode == uiRdModeList[i]);
}
if (!mostProbableModeIncluded)
{
uiRdModeList[numModesForFullRD++] = mostProbableMode;
}
}
}
}
else
{
for( Int i=0; i < numModesForFullRD; i++)
{
uiRdModeList[i] = i;
}
}
...
//遍历uiRdModeList
for( UInt uiMode = 0; uiMode < numModesForFullRD; uiMode++ )
#endif
{
// set luma prediction mode
UInt uiOrgMode = uiRdModeList[uiMode];
pcCU->setIntraDirSubParts ( CHANNEL_TYPE_LUMA, uiOrgMode, uiPartOffset, uiDepth + uiInitTrDepth );
DEBUG_STRING_NEW(sMode)
// set context models
m_pcRDGoOnSbacCoder->load( m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST] );
// determine residual for partition
Distortion uiPUDistY = 0;
Double dPUCost = 0.0;
#if HHI_RQT_INTRA_SPEEDUP
//TU的预测、变换、量化和编码
xRecurIntraCodingLumaQT( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaPU, uiPUDistY, true, dPUCost, tuRecurseWithPU DEBUG_STRING_PASS_INTO(sMode) );
#else
xRecurIntraCodingLumaQT( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaPU, uiPUDistY, dPUCost, tuRecurseWithPU DEBUG_STRING_PASS_INTO(sMode) );
#endif
...
// check r-d cost选择最小的PU Cost
if( dPUCost < dBestPUCost )
{
...
}
...
} // Mode loop
...
//对选出的最优模式在进行处理
xRecurIntraCodingLumaQT( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaPU, uiPUDistY, false, dPUCost, tuRecurseWithPU DEBUG_STRING_PASS_INTO(sModeTree));
// check r-d cost再次选择最小的Cost
if( dPUCost < dBestPUCost )
{
...
}
} // Mode loop
#endif
DEBUG_STRING_APPEND(sDebug, sPU)
//--- update overall distortion ---
...
//--- update transform index and cbf ---
...
//=== update PU data ====
pcCU->setIntraDirSubParts ( CHANNEL_TYPE_LUMA, uiBestPUMode, uiPartOffset, uiDepth + uiInitTrDepth );
} while (tuRecurseWithPU.nextSection(tuRecurseCU));
...
//===== reset context models =====
m_pcRDGoOnSbacCoder->load(m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST]);
//===== set distortion (rate and r-d costs are determined later) =====
pcCU->getTotalDistortion() = uiOverallDistY;
}
TComPrediction::filteringIntraReferenceSamples函数是判断当前PU块的参考像素是否进行平滑,它的判断标准参考我的另一篇博客中参考像素平滑的内容 https://blog.csdn.net/shayashi/article/details/82877875 ,对应代码如下:
Bool TComPrediction::filteringIntraReferenceSamples(const ComponentID compID, UInt uiDirMode, UInt uiTuChWidth, UInt uiTuChHeight, const ChromaFormat chFmt, const Bool intraReferenceSmoothingDisabled)
{
Bool bFilter;
//若不满足局部变量 intraReferenceSmoothingDisabled==false且当前分量亮度分量或者当前取样模式是444,则不滤波
if (!filterIntraReferenceSamples(toChannelType(compID), chFmt, intraReferenceSmoothingDisabled))
{
bFilter=false;
}
else
{
assert(uiTuChWidth>=4 && uiTuChHeight>=4 && uiTuChWidth<128 && uiTuChHeight<128);
//当前模式是DC模式,不滤波
if (uiDirMode == DC_IDX)
{
bFilter=false; //no smoothing for DC or LM chroma
}
else
{
//否则,则根据当前模式距离水平和竖直模式的绝对值最小值和阈值比较,判断是否进行滤波
Int diff = min<Int>(abs((Int) uiDirMode - HOR_IDX), abs((Int)uiDirMode - VER_IDX));//取最小值
UInt sizeIndex=g_aucConvertToBit[uiTuChWidth];//阈值
assert(sizeIndex < MAX_INTRA_FILTER_DEPTHS);//MAX_INTRA_FILTER_DEPTHS==5
bFilter = diff > m_aucIntraFilter[toChannelType(compID)][sizeIndex];//判断
}
}
return bFilter;
}
predIntraAng是预测函数,该函数的流程是获取参考像素,然后根据当前模式选择对应函数进行具体预测:
Void TComPrediction::predIntraAng( const ComponentID compID, UInt uiDirMode, Pel* piOrg /* Will be null for decoding */, UInt uiOrgStride, Pel* piPred, UInt uiStride, TComTU &rTu, const Bool bUseFilteredPredSamples, const Bool bUseLosslessDPCM )
{
const ChannelType channelType = toChannelType(compID);
const TComRectangle &rect = rTu.getRect(isLuma(compID) ? COMPONENT_Y : COMPONENT_Cb);
const Int iWidth = rect.width;
const Int iHeight = rect.height;
assert( g_aucConvertToBit[ iWidth ] >= 0 ); // 4x 4
assert( g_aucConvertToBit[ iWidth ] <= 5 ); // 128x128
//assert( iWidth == iHeight );
Pel *pDst = piPred;
// get starting pixel in block
const Int sw = (2 * iWidth + 1);
if ( bUseLosslessDPCM )//horizaontal or vertical
{
const Pel *ptrSrc = getPredictorPtr( compID, false );
// Sample Adaptive intra-Prediction (SAP)
if (uiDirMode==HOR_IDX)
{
// left column filled with reference samples
// remaining columns filled with piOrg data (if available).
for(Int y=0; y<iHeight; y++)
{
piPred[y*uiStride+0] = ptrSrc[(y+1)*sw];
}
if (piOrg!=0)
{
piPred+=1; // miss off first column
for(Int y=0; y<iHeight; y++, piPred+=uiStride, piOrg+=uiOrgStride)
{
memcpy(piPred, piOrg, (iWidth-1)*sizeof(Pel));
}
}
}
else // VER_IDX
{
// top row filled with reference samples
// remaining rows filled with piOrd data (if available)
for(Int x=0; x<iWidth; x++)
{
piPred[x] = ptrSrc[x+1];
}
if (piOrg!=0)
{
piPred+=uiStride; // miss off the first row
for(Int y=1; y<iHeight; y++, piPred+=uiStride, piOrg+=uiOrgStride)
{
memcpy(piPred, piOrg, iWidth*sizeof(Pel));
}
}
}
}
else
{
//获取像素指针
const Pel *ptrSrc = getPredictorPtr( compID, bUseFilteredPredSamples );
//如果模式是Planar模式,转到xPredIntraPlanar函数处理
if ( uiDirMode == PLANAR_IDX )
{
xPredIntraPlanar( ptrSrc+sw+1, sw, pDst, uiStride, iWidth, iHeight );
}
else
{
// Create the prediction
TComDataCU *const pcCU = rTu.getCU();
const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
const Bool enableEdgeFilters = !(pcCU->isRDPCMEnabled(uiAbsPartIdx) && pcCU->getCUTransquantBypass(uiAbsPartIdx));
#if O0043_BEST_EFFORT_DECODING//false
const Int channelsBitDepthForPrediction = rTu.getCU()->getSlice()->getSPS()->getStreamBitDepth(channelType);
#else
const Int channelsBitDepthForPrediction = rTu.getCU()->getSlice()->getSPS()->getBitDepth(channelType);//Luma 8
#endif
//DC和角度预测函数
xPredIntraAng( channelsBitDepthForPrediction, ptrSrc+sw+1, sw, pDst, uiStride, iWidth, iHeight, channelType, uiDirMode, enableEdgeFilters );
//当前模式是DC,则进行DC边缘滤波
if( uiDirMode == DC_IDX )
{
xDCPredFiltering( ptrSrc+sw+1, sw, pDst, uiStride, iWidth, iHeight, channelType );
}
}
}
}
预测的相关函数:xPredIntraPlanar函数、xPredIntraAng函数和 xDCPredFiltering函数另开一篇博客写:https://blog.csdn.net/shayashi/article/details/83056071
xRecurIntraCodingLumaQT函数是TU的预测、变换、量化和编码函数,该函数先判断PU是否可以整块处理和进行分割;若可以整块处理,则TU大小与PU相同;若分割,则分割成4块TU递归调用xRecurIntraCodingLumaQT函数进行处理。(注意整块处理和分割不矛盾,可以先进行整块处理再分割)。xRecurIntraCodingLumaQT主要调用xIntraCodingTUBlock函数进行变换、量化和编码,调用xStoreIntraResultQT存储。函数代码:
Void
TEncSearch::xRecurIntraCodingLumaQT(TComYuv* pcOrgYuv,
TComYuv* pcPredYuv,
TComYuv* pcResiYuv,
Pel resiLuma[NUMBER_OF_STORED_RESIDUAL_TYPES][MAX_CU_SIZE * MAX_CU_SIZE],
Distortion& ruiDistY,
#if HHI_RQT_INTRA_SPEEDUP
Bool bCheckFirst,
#endif
Double& dRDCost,
TComTU& rTu
DEBUG_STRING_FN_DECLARE(sDebug))
{
//变量定义与预处理,省略
...
#if HHI_RQT_INTRA_SPEEDUP//true
Int maxTuSize = pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize();//5
Int isIntraSlice = (pcCU->getSlice()->getSliceType() == I_SLICE);
// don't check split if TU size is less or equal to max TU size
Bool noSplitIntraMaxTuSize = bCheckFull;
if(m_pcEncCfg->getRDpenalty() && ! isIntraSlice)
//如果不是IntraSlice,且m_rdPenalty==trued的话,会进一步判断
{
// in addition don't check split if TU size is less or equal to 16x16 TU size for non-intra slice
noSplitIntraMaxTuSize = ( uiLog2TrSize <= min(maxTuSize,4) );
// if maximum RD-penalty don't check TU size 32x32
if(m_pcEncCfg->getRDpenalty()==2)
{
bCheckFull = ( uiLog2TrSize <= min(maxTuSize,4));
}
}
if( bCheckFirst && noSplitIntraMaxTuSize )
{
bCheckSplit = false;
}
#else
Int maxTuSize = pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize();
Int isIntraSlice = (pcCU->getSlice()->getSliceType() == I_SLICE);
// if maximum RD-penalty don't check TU size 32x32
if((m_pcEncCfg->getRDpenalty()==2) && !isIntraSlice)
{
bCheckFull = ( uiLog2TrSize <= min(maxTuSize,4));
}
...
//整块处理
if( bCheckFull )
{
//Skip模式
if(checkTransformSkip == true)
{
//----- store original entropy coding status -----
m_pcRDGoOnSbacCoder->store( m_pppcRDSbacCoder[ uiFullDepth ][ CI_QT_TRAFO_ROOT ] );
Distortion singleDistTmpLuma = 0;
UInt singleCbfTmpLuma = 0;
Double singleCostTmp = 0;
Int firstCheckId = 0;
for(Int modeId = firstCheckId; modeId < 2; modeId ++)
{
DEBUG_STRING_NEW(sModeString)
Int default0Save1Load2 = 0;
singleDistTmpLuma=0;
if(modeId == firstCheckId)
{
default0Save1Load2 = 1;
}
else
{
default0Save1Load2 = 2;
}
pcCU->setTransformSkipSubParts ( modeId, COMPONENT_Y, uiAbsPartIdx, totalAdjustedDepthChan );
//具体进行变量、量化、编码函数。Skip模式下default0Save1Load2值为1或2
xIntraCodingTUBlock( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaSingle, false, singleDistTmpLuma, COMPONENT_Y, rTu DEBUG_STRING_PASS_INTO(sModeString), default0Save1Load2 );
//获取Cost
singleCbfTmpLuma = pcCU->getCbf( uiAbsPartIdx, COMPONENT_Y, uiTrDepth );
//----- determine rate and r-d cost -----
if(modeId == 1 && singleCbfTmpLuma == 0)
{
//In order not to code TS flag when cbf is zero, the case for TS with cbf being zero is forbidden.
singleCostTmp = MAX_DOUBLE;
}
else
{
UInt uiSingleBits = xGetIntraBitsQT( rTu, true, false, false );
singleCostTmp = m_pcRdCost->calcRdCost( uiSingleBits, singleDistTmpLuma );
}
//更新最小的的Cost
if(singleCostTmp < dSingleCost)
{
DEBUG_STRING_SWAP(sDebug, sModeString)
dSingleCost = singleCostTmp;
uiSingleDistLuma = singleDistTmpLuma;
uiSingleCbfLuma = singleCbfTmpLuma;
bestModeId[COMPONENT_Y] = modeId;
if(bestModeId[COMPONENT_Y] == firstCheckId)//如果当前是Save模式,直接保存
{
xStoreIntraResultQT(COMPONENT_Y, rTu );//存储当前模式
m_pcRDGoOnSbacCoder->store( m_pppcRDSbacCoder[ uiFullDepth ][ CI_TEMP_BEST ] );
}
//CrossComponentPrediction参数是指色度分量可以由色度分量的值计算得到,参考大神的博客:https://blog.csdn.net/lin453701006/article/details/52909854
if (pcCU->getSlice()->getPPS()->getPpsRangeExtension().getCrossComponentPredictionEnabledFlag())
{
const Int xOffset = rTu.getRect( COMPONENT_Y ).x0;
const Int yOffset = rTu.getRect( COMPONENT_Y ).y0;
for (UInt storedResidualIndex = 0; storedResidualIndex < NUMBER_OF_STORED_RESIDUAL_TYPES; storedResidualIndex++)
{
if (bMaintainResidual[storedResidualIndex])
{
xStoreCrossComponentPredictionResult(resiLuma[storedResidualIndex], resiLumaSingle[storedResidualIndex], rTu, xOffset, yOffset, MAX_CU_SIZE, MAX_CU_SIZE);
}
}
}
}
if (modeId == firstCheckId)
{
m_pcRDGoOnSbacCoder->load ( m_pppcRDSbacCoder[ uiFullDepth ][ CI_QT_TRAFO_ROOT ] );
}
}
...
}
else//非Skip模式
{
//----- store original entropy coding status -----任何分割前都要先保存当前需要保存的值
if( bCheckSplit )
{
m_pcRDGoOnSbacCoder->store( m_pppcRDSbacCoder[ uiFullDepth ][ CI_QT_TRAFO_ROOT ] );
}
//----- code luma/chroma block with given intra prediction mode and store Cbf-----
dSingleCost = 0.0;
pcCU ->setTransformSkipSubParts ( 0, COMPONENT_Y, uiAbsPartIdx, totalAdjustedDepthChan );
//变换、量化、编码函数
xIntraCodingTUBlock( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaSingle, false, uiSingleDistLuma, COMPONENT_Y, rTu DEBUG_STRING_PASS_INTO(sDebug));
//计算Cost,且判断getCrossComponentPredictionEnabledFlag,与上类似,省略
...
}
}
//分割
if( bCheckSplit )
{
//----- store full entropy coding status, load original entropy coding status -----先保存
if( bCheckFull )
{
m_pcRDGoOnSbacCoder->store( m_pppcRDSbacCoder[ uiFullDepth ][ CI_QT_TRAFO_TEST ] );
m_pcRDGoOnSbacCoder->load ( m_pppcRDSbacCoder[ uiFullDepth ][ CI_QT_TRAFO_ROOT ] );
}
else
{
m_pcRDGoOnSbacCoder->store( m_pppcRDSbacCoder[ uiFullDepth ][ CI_QT_TRAFO_ROOT ] );
}
//----- code splitted block -----
Double dSplitCost = 0.0;
Distortion uiSplitDistLuma = 0;
UInt uiSplitCbfLuma = 0;
TComTURecurse tuRecurseChild(rTu, false);
DEBUG_STRING_NEW(sSplit)
do//循环递归处理
{
DEBUG_STRING_NEW(sChild)
#if HHI_RQT_INTRA_SPEEDUP
xRecurIntraCodingLumaQT( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaSplit, uiSplitDistLuma, bCheckFirst, dSplitCost, tuRecurseChild DEBUG_STRING_PASS_INTO(sChild) );
#else
xRecurIntraCodingLumaQT( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaSplit, uiSplitDistLuma, dSplitCost, tuRecurseChild DEBUG_STRING_PASS_INTO(sChild) );
#endif
DEBUG_STRING_APPEND(sSplit, sChild)//GetAbsPartIdxCU() + GetRelPartIdxTU()
uiSplitCbfLuma |= pcCU->getCbf( tuRecurseChild.GetAbsPartIdxTU(), COMPONENT_Y, tuRecurseChild.GetTransformDepthRel() );//mTrDepthRelCU[COMPONENT_Y]
} while (tuRecurseChild.nextSection(rTu) );
...
//----- restore context states -----
m_pcRDGoOnSbacCoder->load ( m_pppcRDSbacCoder[ uiFullDepth ][ CI_QT_TRAFO_ROOT ] );
//----- determine rate and r-d cost -----
UInt uiSplitBits = xGetIntraBitsQT( rTu, true, false, false );
dSplitCost = m_pcRdCost->calcRdCost( uiSplitBits, uiSplitDistLuma );
//===== compare and set best ====Check最小的Cost
if( dSplitCost < dSingleCost )
{
//--- update cost ---
DEBUG_STRING_SWAP(sSplit, sDebug)
ruiDistY += uiSplitDistLuma;
dRDCost += dSplitCost;
//getCrossComponentPredictionEnabledFlag省略
}
}
return;
}
//----- set entropy coding status -----
m_pcRDGoOnSbacCoder->load ( m_pppcRDSbacCoder[ uiFullDepth ][ CI_QT_TRAFO_TEST ] );
//--- set transform index and Cbf values ---
pcCU->setTrIdxSubParts( uiTrDepth, uiAbsPartIdx, uiFullDepth );
const TComRectangle &tuRect=rTu.getRect(COMPONENT_Y);
pcCU->setCbfSubParts ( uiSingleCbfLuma << uiTrDepth, COMPONENT_Y, uiAbsPartIdx, totalAdjustedDepthChan );
pcCU ->setTransformSkipSubParts ( bestModeId[COMPONENT_Y], COMPONENT_Y, uiAbsPartIdx, totalAdjustedDepthChan );
//--- set reconstruction for next intra prediction blocks ---
const UInt uiQTLayer = pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - uiLog2TrSize;
const UInt uiZOrder = pcCU->getZorderIdxInCtu() + uiAbsPartIdx;
const UInt uiWidth = tuRect.width;
const UInt uiHeight = tuRect.height;
Pel* piSrc = m_pcQTTempTComYuv[ uiQTLayer ].getAddr( COMPONENT_Y, uiAbsPartIdx );
UInt uiSrcStride = m_pcQTTempTComYuv[ uiQTLayer ].getStride ( COMPONENT_Y );
Pel* piDes = pcCU->getPic()->getPicYuvRec()->getAddr( COMPONENT_Y, pcCU->getCtuRsAddr(), uiZOrder );
UInt uiDesStride = pcCU->getPic()->getPicYuvRec()->getStride ( COMPONENT_Y );
for( UInt uiY = 0; uiY < uiHeight; uiY++, piSrc += uiSrcStride, piDes += uiDesStride )
{
for( UInt uiX = 0; uiX < uiWidth; uiX++ )
{
piDes[ uiX ] = piSrc[ uiX ];
}
}
}
ruiDistY += uiSingleDistLuma;
dRDCost += dSingleCost;
}