双线性插值4倍上采样的实现

template<typename T>
void pbgBilinearX4(T *pDst, intptr_t dstStride, T *pSrc, intptr_t srcStride, 
                                        int32_t srcWidth, int32_t srcHeight)
{
    int32_t dstWidth  = srcWidth  * 4;
    int32_t dstHeight = srcHeight * 4;

    T *pDstRow = pDst;
    T *pSrcRow = pSrc;
    int32_t  stride1  = dstStride;
    int32_t  stride2  = stride1 << 1;
    int32_t  stride3  = stride2 + stride1;
    int32_t  stride4  = stride2 << 1;

    for (int32_t h = 0; h < 2; h++) // top 2 row, just copy
    {
        for (int32_t w = 0; w < dstWidth; w++)
        {
             pDstRow[w] = pSrcRow[w >> 2];
        }
        pDstRow += dstStride;
    }

    for (int32_t h = 2; h < dstHeight - 2; h += 4)  //   Interpolation
    {
        T *pDstCol = pDstRow;
        T *pSrcCol = pSrcRow;
        // the left 2 cols 
        pDstCol[0]       = pDstCol[1]            = pDstCol[stride1] = pDstCol[stride1 + 1] =  pSrcCol[0];
        pDstCol[stride2] = pDstCol[stride2 + 1]  = pDstCol[stride3] = pDstCol[stride3 + 1] =  pSrcCol[srcStride];

        pDstCol   += 2;
        // the mids
        for (int32_t w = 2; w < dstWidth - 2; w += 4)   
        {
            uint8_t  srcValue[4]    = {pSrcCol[0], pSrcCol[1], pSrcCol[srcStride], pSrcCol[srcStride + 1]};

            pDstCol[0]            = (49 * srcValue[0] + 7  * srcValue[1] + 7  * srcValue[2] + 1  * srcValue[3] + 32) >> 6;  
            pDstCol[1]            = (35 * srcValue[0] + 21 * srcValue[1] + 5  * srcValue[2] + 3  * srcValue[3] + 32) >> 6;
            pDstCol[2]            = (21 * srcValue[0] + 35 * srcValue[1] + 3  * srcValue[2] + 5  * srcValue[3] + 32) >> 6;
            pDstCol[3]            = (7  * srcValue[0] + 49 * srcValue[1] + 1  * srcValue[2] + 7  * srcValue[3] + 32) >> 6;  

            pDstCol[stride1    ]  = (35 * srcValue[0] + 5  * srcValue[1] + 21 * srcValue[2] + 3  * srcValue[3] + 32) >> 6; 
            pDstCol[stride1 + 1]  = (25 * srcValue[0] + 15 * srcValue[1] + 15 * srcValue[2] + 9  * srcValue[3] + 32) >> 6;
            pDstCol[stride1 + 2]  = (15 * srcValue[0] + 25 * srcValue[1] + 9  * srcValue[2] + 15 * srcValue[3] + 32) >> 6;
            pDstCol[stride1 + 3]  = (5  * srcValue[0] + 35 * srcValue[1] + 3  * srcValue[2] + 21 * srcValue[3] + 32) >> 6;

            pDstCol[stride2    ]  = (21 * srcValue[0] + 3  * srcValue[1] + 35 * srcValue[2] + 5  * srcValue[3] + 32) >> 6;
            pDstCol[stride2 + 1]  = (15 * srcValue[0] + 9  * srcValue[1] + 25 * srcValue[2] + 15 * srcValue[3] + 32) >> 6;
            pDstCol[stride2 + 2]  = (9  * srcValue[0] + 15 * srcValue[1] + 15 * srcValue[2] + 25 * srcValue[3] + 32) >> 6;
            pDstCol[stride2 + 3]  = (3  * srcValue[0] + 21 * srcValue[1] + 5  * srcValue[2] + 35 * srcValue[3] + 32) >> 6;

            pDstCol[stride3    ]  = (7  * srcValue[0] + 1  * srcValue[1] + 49 * srcValue[2] + 7  * srcValue[3] + 32) >> 6;
            pDstCol[stride3 + 1]  = (5  * srcValue[0] + 3  * srcValue[1] + 35 * srcValue[2] + 21 * srcValue[3] + 32) >> 6;
            pDstCol[stride3 + 2]  = (3  * srcValue[0] + 5  * srcValue[1] + 21 * srcValue[2] + 35 * srcValue[3] + 32) >> 6;
            pDstCol[stride3 + 3]  = (1  * srcValue[0] + 7  * srcValue[1] + 7  * srcValue[2] + 49 * srcValue[3] + 32) >> 6;

            pSrcCol ++;
            pDstCol += 4;
        }
        // the right 2 cols
        pDstCol[0]       = pDstCol[1]            = pDstCol[stride1] = pDstCol[stride1 + 1] =  pSrcCol[0];
        pDstCol[stride2] = pDstCol[stride2 + 1]  = pDstCol[stride3] = pDstCol[stride3 + 1] =  pSrcCol[srcStride];

        pSrcRow += srcStride;
        pDstRow += stride4; 
    }

    // the bottom 2 rows
    for (int32_t h = 0; h < 2; h++)
    {
        for (int32_t w = 0; w < dstWidth; w++)
        {
            pDstRow[w] = pSrcRow[w >> 2];
        }
        pDstRow += dstStride;
    }
}

猜你喜欢

转载自blog.csdn.net/myzhouwang/article/details/82497154
今日推荐