/**
 * @file patch_idct.cpp
 * @brief IDCT patch.
 *
 * $LicenseInfo:firstyear=2000&license=viewerlgpl$
 * Second Life Viewer Source Code
 * Copyright (C) 2010, Linden Research, Inc.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation;
 * version 2.1 of the License only.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 *
 * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
 * $/LicenseInfo$
 */

#include "linden_common.h"

#include "llmath.h"
//#include "vmath.h"
#include "v3math.h"
#include "patch_dct.h"

LLGroupHeader   *gGOPP;

void set_group_of_patch_header(LLGroupHeader *gopp)
{
    gGOPP = gopp;
}

F32 gPatchDequantizeTable[LARGE_PATCH_SIZE*LARGE_PATCH_SIZE];
void build_patch_dequantize_table(S32 size)
{
    S32 i, j;
    for (j = 0; j < size; j++)
    {
        for (i = 0; i < size; i++)
        {
            gPatchDequantizeTable[j*size + i] = (1.f + 2.f*(i+j));
        }
    }
}

S32 gCurrentDeSize = 0;

F32 gPatchICosines[LARGE_PATCH_SIZE*LARGE_PATCH_SIZE];

void setup_patch_icosines(S32 size)
{
    S32 n, u;
    F32 oosob = F_PI*0.5f/size;

    for (u = 0; u < size; u++)
    {
        for (n = 0; n < size; n++)
        {
            gPatchICosines[u*size+n] = cosf((2.f*n+1.f)*u*oosob);
        }
    }
}

S32 gDeCopyMatrix[LARGE_PATCH_SIZE*LARGE_PATCH_SIZE];

void build_decopy_matrix(S32 size)
{
    S32 i, j, count;
    BOOL    b_diag = FALSE;
    BOOL    b_right = TRUE;

    i = 0;
    j = 0;
    count = 0;

    while (  (i < size)
           &&(j < size))
    {
        gDeCopyMatrix[j*size + i] = count;

        count++;

        if (!b_diag)
        {
            if (b_right)
            {
                if (i < size - 1)
                    i++;
                else
                    j++;
                b_right = FALSE;
                b_diag = TRUE;
            }
            else
            {
                if (j < size - 1)
                    j++;
                else
                    i++;
                b_right = TRUE;
                b_diag = TRUE;
            }
        }
        else
        {
            if (b_right)
            {
                i++;
                j--;
                if (  (i == size - 1)
                    ||(j == 0))
                {
                    b_diag = FALSE;
                }
            }
            else
            {
                i--;
                j++;
                if (  (i == 0)
                    ||(j == size - 1))
                {
                    b_diag = FALSE;
                }
            }
        }
    }
}

void init_patch_decompressor(S32 size)
{
    if (size != gCurrentDeSize)
    {
        gCurrentDeSize = size;
        build_patch_dequantize_table(size);
        setup_patch_icosines(size);
        build_decopy_matrix(size);
    }
}

inline void idct_line(F32 *linein, F32 *lineout, S32 line)
{
    S32 n;
    F32 total;
    F32 *pcp = gPatchICosines;

#ifdef _PATCH_SIZE_16_AND_32_ONLY
    F32 oosob = 2.f/16.f;
    S32 line_size = line*NORMAL_PATCH_SIZE;
    F32 *tlinein, *tpcp;


    for (n = 0; n < NORMAL_PATCH_SIZE; n++)
    {
        tpcp = pcp + n;
        tlinein = linein + line_size;

        total = OO_SQRT2*(*(tlinein++));
        total += *(tlinein++)*(*(tpcp += NORMAL_PATCH_SIZE));
        total += *(tlinein++)*(*(tpcp += NORMAL_PATCH_SIZE));
        total += *(tlinein++)*(*(tpcp += NORMAL_PATCH_SIZE));

        total += *(tlinein++)*(*(tpcp += NORMAL_PATCH_SIZE));
        total += *(tlinein++)*(*(tpcp += NORMAL_PATCH_SIZE));
        total += *(tlinein++)*(*(tpcp += NORMAL_PATCH_SIZE));
        total += *(tlinein++)*(*(tpcp += NORMAL_PATCH_SIZE));

        total += *(tlinein++)*(*(tpcp += NORMAL_PATCH_SIZE));
        total += *(tlinein++)*(*(tpcp += NORMAL_PATCH_SIZE));
        total += *(tlinein++)*(*(tpcp += NORMAL_PATCH_SIZE));
        total += *(tlinein++)*(*(tpcp += NORMAL_PATCH_SIZE));

        total += *(tlinein++)*(*(tpcp += NORMAL_PATCH_SIZE));
        total += *(tlinein++)*(*(tpcp += NORMAL_PATCH_SIZE));
        total += *(tlinein++)*(*(tpcp += NORMAL_PATCH_SIZE));
        total += *(tlinein)*(*(tpcp += NORMAL_PATCH_SIZE));

        *(lineout + line_size + n) = total*oosob;
    }
#else
    F32 oosob = 2.f/size;
    S32 size = gGOPP->patch_size;
    S32 line_size = line*size;
    S32 u;
    for (n = 0; n < size; n++)
    {
        total = OO_SQRT2*linein[line_size];
        for (u = 1; u < size; u++)
        {
            total += linein[line_size + u]*pcp[u*size+n];
        }
        lineout[line_size + n] = total*oosob;
    }
#endif
}

inline void idct_line_large_slow(F32 *linein, F32 *lineout, S32 line)
{
    S32 n;
    F32 total;
    F32 *pcp = gPatchICosines;

    F32 oosob = 2.f/32.f;
    S32 line_size = line*LARGE_PATCH_SIZE;
    F32 *tlinein, *tpcp;


    for (n = 0; n < LARGE_PATCH_SIZE; n++)
    {
        tpcp = pcp + n;
        tlinein = linein + line_size;

        total = OO_SQRT2*(*(tlinein++));
        total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE));

        total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE));

        total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE));

        total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE));

        total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE));

        total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE));

        total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE));

        total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein)*(*(tpcp += LARGE_PATCH_SIZE));

        *(lineout + line_size + n) = total*oosob;
    }
}

// Nota Bene: assumes that coefficients beyond 128 are 0!

void idct_line_large(F32 *linein, F32 *lineout, S32 line)
{
    S32 n;
    F32 total;
    F32 *pcp = gPatchICosines;

    F32 oosob = 2.f/32.f;
    S32 line_size = line*LARGE_PATCH_SIZE;
    F32 *tlinein, *tpcp;
    F32 *baselinein = linein + line_size;
    F32 *baselineout = lineout + line_size;


    for (n = 0; n < LARGE_PATCH_SIZE; n++)
    {
        tpcp = pcp++;
        tlinein = baselinein;

        total = OO_SQRT2*(*(tlinein++));
        total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE));

        total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE));

        total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE));

        total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein)*(*(tpcp));

        *baselineout++ = total*oosob;
    }
}

inline void idct_column(F32 *linein, F32 *lineout, S32 column)
{
    S32 n;
    F32 total;
    F32 *pcp = gPatchICosines;

#ifdef _PATCH_SIZE_16_AND_32_ONLY
    F32 *tlinein, *tpcp;

    for (n = 0; n < NORMAL_PATCH_SIZE; n++)
    {
        tpcp = pcp + n;
        tlinein = linein + column;

        total = OO_SQRT2*(*tlinein);
        total += *(tlinein += NORMAL_PATCH_SIZE)*(*(tpcp += NORMAL_PATCH_SIZE));
        total += *(tlinein += NORMAL_PATCH_SIZE)*(*(tpcp += NORMAL_PATCH_SIZE));
        total += *(tlinein += NORMAL_PATCH_SIZE)*(*(tpcp += NORMAL_PATCH_SIZE));

        total += *(tlinein += NORMAL_PATCH_SIZE)*(*(tpcp += NORMAL_PATCH_SIZE));
        total += *(tlinein += NORMAL_PATCH_SIZE)*(*(tpcp += NORMAL_PATCH_SIZE));
        total += *(tlinein += NORMAL_PATCH_SIZE)*(*(tpcp += NORMAL_PATCH_SIZE));
        total += *(tlinein += NORMAL_PATCH_SIZE)*(*(tpcp += NORMAL_PATCH_SIZE));

        total += *(tlinein += NORMAL_PATCH_SIZE)*(*(tpcp += NORMAL_PATCH_SIZE));
        total += *(tlinein += NORMAL_PATCH_SIZE)*(*(tpcp += NORMAL_PATCH_SIZE));
        total += *(tlinein += NORMAL_PATCH_SIZE)*(*(tpcp += NORMAL_PATCH_SIZE));
        total += *(tlinein += NORMAL_PATCH_SIZE)*(*(tpcp += NORMAL_PATCH_SIZE));

        total += *(tlinein += NORMAL_PATCH_SIZE)*(*(tpcp += NORMAL_PATCH_SIZE));
        total += *(tlinein += NORMAL_PATCH_SIZE)*(*(tpcp += NORMAL_PATCH_SIZE));
        total += *(tlinein += NORMAL_PATCH_SIZE)*(*(tpcp += NORMAL_PATCH_SIZE));
        total += *(tlinein += NORMAL_PATCH_SIZE)*(*(tpcp += NORMAL_PATCH_SIZE));

        *(lineout + (n<<4) + column) = total;
    }

#else
    S32 size = gGOPP->patch_size;
    S32 u;
    S32 u_size;

    for (n = 0; n < size; n++)
    {
        total = OO_SQRT2*linein[column];
        for (u = 1; u < size; u++)
        {
            u_size = u*size;
            total += linein[u_size + column]*pcp[u_size+n];
        }
        lineout[size*n + column] = total;
    }
#endif
}

inline void idct_column_large_slow(F32 *linein, F32 *lineout, S32 column)
{
    S32 n;
    F32 total;
    F32 *pcp = gPatchICosines;

    F32 *tlinein, *tpcp;

    for (n = 0; n < LARGE_PATCH_SIZE; n++)
    {
        tpcp = pcp + n;
        tlinein = linein + column;

        total = OO_SQRT2*(*tlinein);
        total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE));

        total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE));

        total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE));

        total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE));

        total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE));

        total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE));

        total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE));

        total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE));
        total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE));

        *(lineout + (n<<5) + column) = total;
    }
}

// Nota Bene: assumes that coefficients beyond 128 are 0!

void idct_column_large(F32 *linein, F32 *lineout, S32 column)
{
    S32 n, m;
    F32 total;
    F32 *pcp = gPatchICosines;

    F32 *tlinein, *tpcp;
    F32 *baselinein = linein + column;
    F32 *baselineout = lineout + column;

    for (n = 0; n < LARGE_PATCH_SIZE; n++)
    {
        tpcp = pcp++;
        tlinein = baselinein;

        total = OO_SQRT2*(*tlinein);
        for (m = 1; m < NORMAL_PATCH_SIZE; m++)
            total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE));

        *(baselineout + (n<<5)) = total;
    }
}

inline void idct_patch(F32 *block)
{
    F32 temp[LARGE_PATCH_SIZE*LARGE_PATCH_SIZE];

#ifdef _PATCH_SIZE_16_AND_32_ONLY
    idct_column(block, temp, 0);
    idct_column(block, temp, 1);
    idct_column(block, temp, 2);
    idct_column(block, temp, 3);

    idct_column(block, temp, 4);
    idct_column(block, temp, 5);
    idct_column(block, temp, 6);
    idct_column(block, temp, 7);

    idct_column(block, temp, 8);
    idct_column(block, temp, 9);
    idct_column(block, temp, 10);
    idct_column(block, temp, 11);

    idct_column(block, temp, 12);
    idct_column(block, temp, 13);
    idct_column(block, temp, 14);
    idct_column(block, temp, 15);

    idct_line(temp, block, 0);
    idct_line(temp, block, 1);
    idct_line(temp, block, 2);
    idct_line(temp, block, 3);

    idct_line(temp, block, 4);
    idct_line(temp, block, 5);
    idct_line(temp, block, 6);
    idct_line(temp, block, 7);

    idct_line(temp, block, 8);
    idct_line(temp, block, 9);
    idct_line(temp, block, 10);
    idct_line(temp, block, 11);

    idct_line(temp, block, 12);
    idct_line(temp, block, 13);
    idct_line(temp, block, 14);
    idct_line(temp, block, 15);
#else
    S32 i;
    S32 size = gGOPP->patch_size;
    for (i = 0; i < size; i++)
    {
        idct_column(block, temp, i);
    }
    for (i = 0; i < size; i++)
    {
        idct_line(temp, block, i);
    }
#endif
}

inline void idct_patch_large(F32 *block)
{
    F32 temp[LARGE_PATCH_SIZE*LARGE_PATCH_SIZE];

    idct_column_large_slow(block, temp, 0);
    idct_column_large_slow(block, temp, 1);
    idct_column_large_slow(block, temp, 2);
    idct_column_large_slow(block, temp, 3);

    idct_column_large_slow(block, temp, 4);
    idct_column_large_slow(block, temp, 5);
    idct_column_large_slow(block, temp, 6);
    idct_column_large_slow(block, temp, 7);

    idct_column_large_slow(block, temp, 8);
    idct_column_large_slow(block, temp, 9);
    idct_column_large_slow(block, temp, 10);
    idct_column_large_slow(block, temp, 11);

    idct_column_large_slow(block, temp, 12);
    idct_column_large_slow(block, temp, 13);
    idct_column_large_slow(block, temp, 14);
    idct_column_large_slow(block, temp, 15);

    idct_column_large_slow(block, temp, 16);
    idct_column_large_slow(block, temp, 17);
    idct_column_large_slow(block, temp, 18);
    idct_column_large_slow(block, temp, 19);

    idct_column_large_slow(block, temp, 20);
    idct_column_large_slow(block, temp, 21);
    idct_column_large_slow(block, temp, 22);
    idct_column_large_slow(block, temp, 23);

    idct_column_large_slow(block, temp, 24);
    idct_column_large_slow(block, temp, 25);
    idct_column_large_slow(block, temp, 26);
    idct_column_large_slow(block, temp, 27);

    idct_column_large_slow(block, temp, 28);
    idct_column_large_slow(block, temp, 29);
    idct_column_large_slow(block, temp, 30);
    idct_column_large_slow(block, temp, 31);

    idct_line_large_slow(temp, block, 0);
    idct_line_large_slow(temp, block, 1);
    idct_line_large_slow(temp, block, 2);
    idct_line_large_slow(temp, block, 3);

    idct_line_large_slow(temp, block, 4);
    idct_line_large_slow(temp, block, 5);
    idct_line_large_slow(temp, block, 6);
    idct_line_large_slow(temp, block, 7);

    idct_line_large_slow(temp, block, 8);
    idct_line_large_slow(temp, block, 9);
    idct_line_large_slow(temp, block, 10);
    idct_line_large_slow(temp, block, 11);

    idct_line_large_slow(temp, block, 12);
    idct_line_large_slow(temp, block, 13);
    idct_line_large_slow(temp, block, 14);
    idct_line_large_slow(temp, block, 15);

    idct_line_large_slow(temp, block, 16);
    idct_line_large_slow(temp, block, 17);
    idct_line_large_slow(temp, block, 18);
    idct_line_large_slow(temp, block, 19);

    idct_line_large_slow(temp, block, 20);
    idct_line_large_slow(temp, block, 21);
    idct_line_large_slow(temp, block, 22);
    idct_line_large_slow(temp, block, 23);

    idct_line_large_slow(temp, block, 24);
    idct_line_large_slow(temp, block, 25);
    idct_line_large_slow(temp, block, 26);
    idct_line_large_slow(temp, block, 27);

    idct_line_large_slow(temp, block, 28);
    idct_line_large_slow(temp, block, 29);
    idct_line_large_slow(temp, block, 30);
    idct_line_large_slow(temp, block, 31);
}

S32 gDitherNoise = 128;

void decompress_patch(F32 *patch, S32 *cpatch, LLPatchHeader *ph)
{
    S32     i, j;

    F32     block[LARGE_PATCH_SIZE*LARGE_PATCH_SIZE], *tblock = block;
    F32     *tpatch;

    LLGroupHeader   *gopp = gGOPP;
    S32     size = gopp->patch_size;
    F32     range = ph->range;
    S32     prequant = (ph->quant_wbits >> 4) + 2;
    S32     quantize = 1<<prequant;
    F32     hmin = ph->dc_offset;
    S32     stride = gopp->stride;

    F32     ooq = 1.f/(F32)quantize;
    F32     *dq = gPatchDequantizeTable;
    S32     *decopy_matrix = gDeCopyMatrix;

    F32     mult = ooq*range;
    F32     addval = mult*(F32)(1<<(prequant - 1))+hmin;

    for (i = 0; i < size*size; i++)
    {
        *(tblock++) = *(cpatch + *(decopy_matrix++))*(*dq++);
    }

    if (size == 16)
    {
        idct_patch(block);
    }
    else
    {
        idct_patch_large(block);
    }

    for (j = 0; j < size; j++)
    {
        tpatch = patch + j*stride;
        tblock = block + j*size;
        for (i = 0; i < size; i++)
        {
            *(tpatch++) = *(tblock++)*mult+addval;
        }
    }
}


void decompress_patchv(LLVector3 *v, S32 *cpatch, LLPatchHeader *ph)
{
    S32     i, j;

    F32         block[LARGE_PATCH_SIZE*LARGE_PATCH_SIZE], *tblock = block;
    LLVector3   *tvec;

    LLGroupHeader   *gopp = gGOPP;
    S32     size = gopp->patch_size;
    F32     range = ph->range;
    S32     prequant = (ph->quant_wbits >> 4) + 2;
    S32     quantize = 1<<prequant;
    F32     hmin = ph->dc_offset;
    S32     stride = gopp->stride;

    F32     ooq = 1.f/(F32)quantize;
    F32     *dq = gPatchDequantizeTable;
    S32     *decopy_matrix = gDeCopyMatrix;

    F32     mult = ooq*range;
    F32     addval = mult*(F32)(1<<(prequant - 1))+hmin;

//  BOOL    b_diag = FALSE;
//  BOOL    b_right = TRUE;

    for (i = 0; i < size*size; i++)
    {
        *(tblock++) = *(cpatch + *(decopy_matrix++))*(*dq++);
    }

    if (size == 16)
        idct_patch(block);
    else
        idct_patch_large(block);

    for (j = 0; j < size; j++)
    {
        tvec = v + j*stride;
        tblock = block + j*size;
        for (i = 0; i < size; i++)
        {
            (*tvec++).mV[VZ] = *(tblock++)*mult+addval;
        }
    }
}