/**
 * @file llimage.cpp
 * @brief Base class for images.
 *
 * $LicenseInfo:firstyear=2001&license=viewerlgpl$
 * Second Life Viewer Source Code
 * Copyright (C) 2010, Linden Research, Inc.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation;
 * version 2.1 of the License only.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 *
 * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
 * $/LicenseInfo$
 */

#include "linden_common.h"

#include "llimageworker.h"
#include "llimage.h"

#include "llmath.h"
#include "v4coloru.h"
#include "v3color.h"

#include "llimagebmp.h"
#include "llimagetga.h"
#include "llimagej2c.h"
#include "llimagejpeg.h"
#include "llimagepng.h"
#include "llimagedxt.h"
#include "llmemory.h"

#include <boost/preprocessor.hpp>

//..................................................................................
//..................................................................................
// Helper macrose's for generate cycle unwrap templates
//..................................................................................
#define _UNROL_GEN_TPL_arg_0(arg)
#define _UNROL_GEN_TPL_arg_1(arg) arg

#define _UNROL_GEN_TPL_comma_0
#define _UNROL_GEN_TPL_comma_1 BOOST_PP_COMMA()
//..................................................................................
#define _UNROL_GEN_TPL_ARGS_macro(z,n,seq) \
    BOOST_PP_CAT(_UNROL_GEN_TPL_arg_, BOOST_PP_MOD(n, 2))(BOOST_PP_SEQ_ELEM(n, seq)) BOOST_PP_CAT(_UNROL_GEN_TPL_comma_, BOOST_PP_AND(BOOST_PP_MOD(n, 2), BOOST_PP_NOT_EQUAL(BOOST_PP_INC(n), BOOST_PP_SEQ_SIZE(seq))))

#define _UNROL_GEN_TPL_ARGS(seq) \
    BOOST_PP_REPEAT(BOOST_PP_SEQ_SIZE(seq), _UNROL_GEN_TPL_ARGS_macro, seq)
//..................................................................................

#define _UNROL_GEN_TPL_TYPE_ARGS_macro(z,n,seq) \
    BOOST_PP_SEQ_ELEM(n, seq) BOOST_PP_CAT(_UNROL_GEN_TPL_comma_, BOOST_PP_AND(BOOST_PP_MOD(n, 2), BOOST_PP_NOT_EQUAL(BOOST_PP_INC(n), BOOST_PP_SEQ_SIZE(seq))))

#define _UNROL_GEN_TPL_TYPE_ARGS(seq) \
    BOOST_PP_REPEAT(BOOST_PP_SEQ_SIZE(seq), _UNROL_GEN_TPL_TYPE_ARGS_macro, seq)
//..................................................................................
#define _UNROLL_GEN_TPL_foreach_ee(z, n, seq) \
    executor<n>(_UNROL_GEN_TPL_ARGS(seq));

#define _UNROLL_GEN_TPL(name, args_seq, operation, spec) \
    template<> struct name<spec> { \
    private: \
        template<S32 _idx> inline void executor(_UNROL_GEN_TPL_TYPE_ARGS(args_seq)) { \
            BOOST_PP_SEQ_ENUM(operation) ; \
        } \
    public: \
        inline void operator()(_UNROL_GEN_TPL_TYPE_ARGS(args_seq)) { \
            BOOST_PP_REPEAT(spec, _UNROLL_GEN_TPL_foreach_ee, args_seq) \
        } \
};
//..................................................................................
#define _UNROLL_GEN_TPL_foreach_seq_macro(r, data, elem) \
    _UNROLL_GEN_TPL(BOOST_PP_SEQ_ELEM(0, data), BOOST_PP_SEQ_ELEM(1, data), BOOST_PP_SEQ_ELEM(2, data), elem)

#define UNROLL_GEN_TPL(name, args_seq, operation, spec_seq) \
    /*general specialization - should not be implemented!*/ \
    template<U8> struct name { inline void operator()(_UNROL_GEN_TPL_TYPE_ARGS(args_seq)) { /*static_assert(!"Should not be instantiated.");*/  } }; \
    BOOST_PP_SEQ_FOR_EACH(_UNROLL_GEN_TPL_foreach_seq_macro, (name)(args_seq)(operation), spec_seq)
//..................................................................................
//..................................................................................


//..................................................................................
// Generated unrolling loop templates with specializations
//..................................................................................
//example: for(c = 0; c < ch; ++c) comp[c] = cx[0] = 0;
UNROLL_GEN_TPL(uroll_zeroze_cx_comp, (S32 *)(cx)(S32 *)(comp), (cx[_idx] = comp[_idx] = 0), (1)(3)(4));
//example: for(c = 0; c < ch; ++c) comp[c] >>= 4;
UNROLL_GEN_TPL(uroll_comp_rshftasgn_constval, (S32 *)(comp)(const S32)(cval), (comp[_idx] >>= cval), (1)(3)(4));
//example: for(c = 0; c < ch; ++c) comp[c] = (cx[c] >> 5) * yap;
UNROLL_GEN_TPL(uroll_comp_asgn_cx_rshft_cval_all_mul_val, (S32 *)(comp)(S32 *)(cx)(const S32)(cval)(S32)(val), (comp[_idx] = (cx[_idx] >> cval) * val), (1)(3)(4));
//example: for(c = 0; c < ch; ++c) comp[c] += (cx[c] >> 5) * Cy;
UNROLL_GEN_TPL(uroll_comp_plusasgn_cx_rshft_cval_all_mul_val, (S32 *)(comp)(S32 *)(cx)(const S32)(cval)(S32)(val), (comp[_idx] += (cx[_idx] >> cval) * val), (1)(3)(4));
//example: for(c = 0; c < ch; ++c) comp[c] += pix[c] * info.xapoints[x];
UNROLL_GEN_TPL(uroll_inp_plusasgn_pix_mul_val, (S32 *)(comp)(const U8 *)(pix)(S32)(val), (comp[_idx] += pix[_idx] * val), (1)(3)(4));
//example: for(c = 0; c < ch; ++c) cx[c] = pix[c] * info.xapoints[x];
UNROLL_GEN_TPL(uroll_inp_asgn_pix_mul_val, (S32 *)(comp)(const U8 *)(pix)(S32)(val), (comp[_idx] = pix[_idx] * val), (1)(3)(4));
//example: for(c = 0; c < ch; ++c) comp[c] = ((cx[c] * info.yapoints[y]) + (comp[c] * (256 - info.yapoints[y]))) >> 16;
UNROLL_GEN_TPL(uroll_comp_asgn_cx_mul_apoint_plus_comp_mul_inv_apoint_allshifted_16_r, (S32 *)(comp)(S32 *)(cx)(S32)(apoint), (comp[_idx] = ((cx[_idx] * apoint) + (comp[_idx] * (256 - apoint))) >> 16), (1)(3)(4));
//example: for(c = 0; c < ch; ++c) comp[c] = (comp[c] + pix[c] * info.yapoints[y]) >> 8;
UNROLL_GEN_TPL(uroll_comp_asgn_comp_plus_pix_mul_apoint_allshifted_8_r, (S32 *)(comp)(const U8 *)(pix)(S32)(apoint), (comp[_idx] = (comp[_idx] + pix[_idx] * apoint) >> 8), (1)(3)(4));
//example: for(c = 0; c < ch; ++c) comp[c] = ((comp[c]*(256 - info.xapoints[x])) + ((cx[c] * info.xapoints[x]))) >> 12;
UNROLL_GEN_TPL(uroll_comp_asgn_comp_mul_inv_apoint_plus_cx_mul_apoint_allshifted_12_r, (S32 *)(comp)(S32)(apoint)(S32 *)(cx), (comp[_idx] = ((comp[_idx] * (256-apoint)) + (cx[_idx] * apoint)) >> 12), (1)(3)(4));
//example: for(c = 0; c < ch; ++c) *dptr++ = comp[c]&0xff;
UNROLL_GEN_TPL(uroll_uref_dptr_inc_asgn_comp_and_ff, (U8 *&)(dptr)(S32 *)(comp), (*dptr++ = comp[_idx]&0xff), (1)(3)(4));
//example: for(c = 0; c < ch; ++c) *dptr++ = (sptr[info.xpoints[x]*ch + c])&0xff;
UNROLL_GEN_TPL(uroll_uref_dptr_inc_asgn_sptr_apoint_plus_idx_alland_ff, (U8 *&)(dptr)(const U8 *)(sptr)(S32)(apoint), (*dptr++ = sptr[apoint + _idx]&0xff), (1)(3)(4));
//example: for(c = 0; c < ch; ++c) *dptr++ = (comp[c]>>10)&0xff;
UNROLL_GEN_TPL(uroll_uref_dptr_inc_asgn_comp_rshft_cval_and_ff, (U8 *&)(dptr)(S32 *)(comp)(const S32)(cval), (*dptr++ = (comp[_idx]>>cval)&0xff), (1)(3)(4));
//..................................................................................


template<U8 ch>
struct scale_info
{
public:
    std::vector<S32> xpoints;
    std::vector<const U8*> ystrides;
    std::vector<S32> xapoints, yapoints;
    S32 xup_yup;

public:
    //unrolling loop types declaration
    typedef uroll_zeroze_cx_comp<ch>                                                        uroll_zeroze_cx_comp_t;
    typedef uroll_comp_rshftasgn_constval<ch>                                               uroll_comp_rshftasgn_constval_t;
    typedef uroll_comp_asgn_cx_rshft_cval_all_mul_val<ch>                                   uroll_comp_asgn_cx_rshft_cval_all_mul_val_t;
    typedef uroll_comp_plusasgn_cx_rshft_cval_all_mul_val<ch>                               uroll_comp_plusasgn_cx_rshft_cval_all_mul_val_t;
    typedef uroll_inp_plusasgn_pix_mul_val<ch>                                              uroll_inp_plusasgn_pix_mul_val_t;
    typedef uroll_inp_asgn_pix_mul_val<ch>                                                  uroll_inp_asgn_pix_mul_val_t;
    typedef uroll_comp_asgn_cx_mul_apoint_plus_comp_mul_inv_apoint_allshifted_16_r<ch>      uroll_comp_asgn_cx_mul_apoint_plus_comp_mul_inv_apoint_allshifted_16_r_t;
    typedef uroll_comp_asgn_comp_plus_pix_mul_apoint_allshifted_8_r<ch>                     uroll_comp_asgn_comp_plus_pix_mul_apoint_allshifted_8_r_t;
    typedef uroll_comp_asgn_comp_mul_inv_apoint_plus_cx_mul_apoint_allshifted_12_r<ch>      uroll_comp_asgn_comp_mul_inv_apoint_plus_cx_mul_apoint_allshifted_12_r_t;
    typedef uroll_uref_dptr_inc_asgn_comp_and_ff<ch>                                        uroll_uref_dptr_inc_asgn_comp_and_ff_t;
    typedef uroll_uref_dptr_inc_asgn_sptr_apoint_plus_idx_alland_ff<ch>                     uroll_uref_dptr_inc_asgn_sptr_apoint_plus_idx_alland_ff_t;
    typedef uroll_uref_dptr_inc_asgn_comp_rshft_cval_and_ff<ch>                             uroll_uref_dptr_inc_asgn_comp_rshft_cval_and_ff_t;

public:
    scale_info(const U8 *src, U32 srcW, U32 srcH, U32 dstW, U32 dstH, U32 srcStride)
        : xup_yup((dstW >= srcW) + ((dstH >= srcH) << 1))
    {
        calc_x_points(srcW, dstW);
        calc_y_strides(src, srcStride, srcH, dstH);
        calc_aa_points(srcW, dstW, xup_yup&1, xapoints);
        calc_aa_points(srcH, dstH, xup_yup&2, yapoints);
    }

private:
    //...........................................................................................
    void calc_x_points(U32 srcW, U32 dstW)
    {
        xpoints.resize(dstW+1);

        S32 val = dstW >= srcW ? 0x8000 * srcW / dstW - 0x8000 : 0;
        S32 inc = (srcW << 16) / dstW;

        for(U32 i = 0, j = 0; i < dstW; ++i, ++j, val += inc)
        {
            xpoints[j] = llmax(0, val >> 16);
        }
    }
    //...........................................................................................
    void calc_y_strides(const U8 *src, U32 srcStride, U32 srcH, U32 dstH)
    {
        ystrides.resize(dstH+1);

        S32 val = dstH >= srcH ? 0x8000 * srcH / dstH - 0x8000 : 0;
        S32 inc = (srcH << 16) / dstH;

        for(U32 i = 0, j = 0; i < dstH; ++i, ++j, val += inc)
        {
            ystrides[j] = src + llmax(0, val >> 16) * srcStride;
        }
    }
    //...........................................................................................
    void calc_aa_points(U32 srcSz, U32 dstSz, bool scale_up, std::vector<S32> &vp)
    {
        vp.resize(dstSz);

        if(scale_up)
        {
            S32 val = 0x8000 * srcSz / dstSz - 0x8000;
            S32 inc = (srcSz << 16) / dstSz;
            U32 pos;

            for(U32 i = 0, j = 0; i < dstSz; ++i, ++j, val += inc)
            {
                pos = val >> 16;

                if (pos >= (srcSz - 1))
                    vp[j] = 0;
                else
                    vp[j] = (val >> 8) - ((val >> 8) & 0xffffff00);
            }
        }
        else
        {
            S32 inc = (srcSz << 16) / dstSz;
            S32 Cp = ((dstSz << 14) / srcSz) + 1;
            S32 ap;

            for(U32 i = 0, j = 0, val = 0; i < dstSz; ++i, ++j, val += inc)
            {
                ap = ((0x100 - ((val >> 8) & 0xff)) * Cp) >> 8;
                vp[j] = ap | (Cp << 16);
            }
        }
    }
};


template<U8 ch>
inline void bilinear_scale(
    const U8 *src, U32 srcW, U32 srcH, U32 srcStride
    , U8 *dst, U32 dstW, U32 dstH, U32 dstStride
    )
{
    typedef scale_info<ch> scale_info_t;

    scale_info_t info(src, srcW, srcH, dstW, dstH, srcStride);

    const U8 *sptr;
    U8 *dptr;
    U32 x, y;
    const U8 *pix;

    S32 cx[ch], comp[ch];


    if(3 == info.xup_yup)
    { //scale x/y - up
        for(y = 0; y < dstH; ++y)
        {
            dptr = dst + (y * dstStride);
            sptr = info.ystrides[y];

            if(0 < info.yapoints[y])
            {
                for(x = 0; x < dstW; ++x)
                {
                    //for(c = 0; c < ch; ++c) cx[c] = comp[c] = 0;
                    typename scale_info_t::uroll_zeroze_cx_comp_t()(cx, comp);

                    if(0 < info.xapoints[x])
                    {
                        pix = info.ystrides[y] + info.xpoints[x] * ch;

                        //for(c = 0; c < ch; ++c) comp[c] = pix[c] * (256 - info.xapoints[x]);
                        typename scale_info_t::uroll_inp_asgn_pix_mul_val_t()(comp, pix, 256 - info.xapoints[x]);

                        pix += ch;

                        //for(c = 0; c < ch; ++c) comp[c] += pix[c] * info.xapoints[x];
                        typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(comp, pix, info.xapoints[x]);

                        pix += srcStride;

                        //for(c = 0; c < ch; ++c) cx[c] = pix[c] * info.xapoints[x];
                        typename scale_info_t::uroll_inp_asgn_pix_mul_val_t()(cx, pix, info.xapoints[x]);

                        pix -= ch;

                        //for(c = 0; c < ch; ++c) {
                        //  cx[c] += pix[c] * (256 - info.xapoints[x]);
                        //  comp[c] = ((cx[c] * info.yapoints[y]) + (comp[c] * (256 - info.yapoints[y]))) >> 16;
                        //  *dptr++ = comp[c]&0xff;
                        //}
                        typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(cx, pix, 256 - info.xapoints[x]);
                        typename scale_info_t::uroll_comp_asgn_cx_mul_apoint_plus_comp_mul_inv_apoint_allshifted_16_r_t()(comp, cx, info.yapoints[y]);
                        typename scale_info_t::uroll_uref_dptr_inc_asgn_comp_and_ff_t()(dptr, comp);
                    }
                    else
                    {
                        pix = info.ystrides[y] + info.xpoints[x] * ch;

                        //for(c = 0; c < ch; ++c) comp[c] = pix[c] * (256 - info.yapoints[y]);
                        typename scale_info_t::uroll_inp_asgn_pix_mul_val_t()(comp, pix, 256-info.yapoints[y]);

                        pix += srcStride;

                        //for(c = 0; c < ch; ++c) {
                        //  comp[c] = (comp[c] + pix[c] * info.yapoints[y]) >> 8;
                        //  *dptr++ = comp[c]&0xff;
                        //}
                        typename scale_info_t::uroll_comp_asgn_comp_plus_pix_mul_apoint_allshifted_8_r_t()(comp, pix, info.yapoints[y]);
                        typename scale_info_t::uroll_uref_dptr_inc_asgn_comp_and_ff_t()(dptr, comp);
                    }
                }
            }
            else
            {
                for(x = 0; x < dstW; ++x)
                {
                    if(0 < info.xapoints[x])
                    {
                        pix = info.ystrides[y] + info.xpoints[x] * ch;

                        //for(c = 0; c < ch; ++c) {
                        //  comp[c] = pix[c] * (256 - info.xapoints[x]);
                        //  comp[c] = (comp[c] + pix[c] * info.xapoints[x]) >> 8;
                        //  *dptr++ = comp[c]&0xff;
                        //}
                        typename scale_info_t::uroll_inp_asgn_pix_mul_val_t()(comp, pix, 256 - info.xapoints[x]);
                        typename scale_info_t::uroll_comp_asgn_comp_plus_pix_mul_apoint_allshifted_8_r_t()(comp, pix, info.xapoints[x]);
                        typename scale_info_t::uroll_uref_dptr_inc_asgn_comp_and_ff_t()(dptr, comp);
                    }
                    else
                    {
                        //for(c = 0; c < ch; ++c) *dptr++ = (sptr[info.xpoints[x]*ch + c])&0xff;
                        typename scale_info_t::uroll_uref_dptr_inc_asgn_sptr_apoint_plus_idx_alland_ff_t()(dptr, sptr, info.xpoints[x]*ch);
                    }
                }
            }
        }
    }
    else if(info.xup_yup == 1)
    { //scaling down vertically
        S32 Cy, j;
        S32 yap;

        for(y = 0; y < dstH; y++)
        {
            Cy = info.yapoints[y] >> 16;
            yap = info.yapoints[y] & 0xffff;

            dptr = dst + (y * dstStride);

            for(x = 0; x < dstW; x++)
            {
                pix = info.ystrides[y] + info.xpoints[x] * ch;

                //for(c = 0; c < ch; ++c) comp[c] = pix[c] * yap;
                typename scale_info_t::uroll_inp_asgn_pix_mul_val_t()(comp, pix, yap);

                pix += srcStride;

                for(j = (1 << 14) - yap; j > Cy; j -= Cy, pix += srcStride)
                {
                    //for(c = 0; c < ch; ++c) comp[c] += pix[c] * Cy;
                    typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(comp, pix, Cy);
                }

                if(j > 0)
                {
                    //for(c = 0; c < ch; ++c) comp[c] += pix[c] * j;
                    typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(comp, pix, j);
                }

                if(info.xapoints[x] > 0)
                {
                    pix = info.ystrides[y] + info.xpoints[x]*ch + ch;
                    //for(c = 0; c < ch; ++c) cx[c] = pix[c] * yap;
                    typename scale_info_t::uroll_inp_asgn_pix_mul_val_t()(cx, pix, yap);

                    pix += srcStride;
                    for(j = (1 << 14) - yap; j > Cy; j -= Cy)
                    {
                        //for(c = 0; c < ch; ++c) cx[c] += pix[c] * Cy;
                        typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(cx, pix, Cy);
                        pix += srcStride;
                    }

                    if(j > 0)
                    {
                        //for(c = 0; c < ch; ++c) cx[c] += pix[c] * j;
                        typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(cx, pix, j);
                    }

                    //for(c = 0; c < ch; ++c) comp[c] = ((comp[c]*(256 - info.xapoints[x])) + ((cx[c] * info.xapoints[x]))) >> 12;
                    typename scale_info_t::uroll_comp_asgn_comp_mul_inv_apoint_plus_cx_mul_apoint_allshifted_12_r_t()(comp, info.xapoints[x], cx);
                }
                else
                {
                    //for(c = 0; c < ch; ++c) comp[c] >>= 4;
                    typename scale_info_t::uroll_comp_rshftasgn_constval_t()(comp, 4);
                }

                //for(c = 0; c < ch; ++c) *dptr++ = (comp[c]>>10)&0xff;
                typename scale_info_t::uroll_uref_dptr_inc_asgn_comp_rshft_cval_and_ff_t()(dptr, comp, 10);
            }
        }
    }
    else if(info.xup_yup == 2)
    { // scaling down horizontally
        S32 Cx, j;
        S32 xap;

        for(y = 0; y < dstH; y++)
        {
            dptr = dst + (y * dstStride);

            for(x = 0; x < dstW; x++)
            {
                Cx = info.xapoints[x] >> 16;
                xap = info.xapoints[x] & 0xffff;

                pix = info.ystrides[y] + info.xpoints[x] * ch;

                //for(c = 0; c < ch; ++c) comp[c] = pix[c] * xap;
                typename scale_info_t::uroll_inp_asgn_pix_mul_val_t()(comp, pix, xap);

                pix+=ch;
                for(j = (1 << 14) - xap; j > Cx; j -= Cx)
                {
                    //for(c = 0; c < ch; ++c) comp[c] += pix[c] * Cx;
                    typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(comp, pix, Cx);
                    pix+=ch;
                }

                if(j > 0)
                {
                    //for(c = 0; c < ch; ++c) comp[c] += pix[c] * j;
                    typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(comp, pix, j);
                }

                if(info.yapoints[y] > 0)
                {
                    pix = info.ystrides[y] + info.xpoints[x]*ch + srcStride;
                    //for(c = 0; c < ch; ++c) cx[c] = pix[c] * xap;
                    typename scale_info_t::uroll_inp_asgn_pix_mul_val_t()(cx, pix, xap);

                    pix+=ch;
                    for(j = (1 << 14) - xap; j > Cx; j -= Cx)
                    {
                        //for(c = 0; c < ch; ++c) cx[c] += pix[c] * Cx;
                        typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(cx, pix, Cx);
                        pix+=ch;
                    }

                    if(j > 0)
                    {
                        //for(c = 0; c < ch; ++c) cx[c] += pix[c] * j;
                        typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(cx, pix, j);
                    }

                    //for(c = 0; c < ch; ++c) comp[c] = ((comp[c] * (256 - info.yapoints[y])) + ((cx[c] * info.yapoints[y]))) >> 12;
                    typename scale_info_t::uroll_comp_asgn_comp_mul_inv_apoint_plus_cx_mul_apoint_allshifted_12_r_t()(comp, info.yapoints[y], cx);
                }
                else
                {
                    //for(c = 0; c < ch; ++c) comp[c] >>= 4;
                    typename scale_info_t::uroll_comp_rshftasgn_constval_t()(comp, 4);
                }

                //for(c = 0; c < ch; ++c) *dptr++ = (comp[c]>>10)&0xff;
                typename scale_info_t::uroll_uref_dptr_inc_asgn_comp_rshft_cval_and_ff_t()(dptr, comp, 10);
            }
        }
    }
    else
    { //scale x/y - down
        S32 Cx, Cy, i, j;
        S32 xap, yap;

        for(y = 0; y < dstH; y++)
        {
            Cy = info.yapoints[y] >> 16;
            yap = info.yapoints[y] & 0xffff;

            dptr = dst + (y * dstStride);
            for(x = 0; x < dstW; x++)
            {
                Cx = info.xapoints[x] >> 16;
                xap = info.xapoints[x] & 0xffff;

                sptr = info.ystrides[y] + info.xpoints[x] * ch;
                pix = sptr;
                sptr += srcStride;

                //for(c = 0; c < ch; ++c) cx[c] = pix[c] * xap;
                typename scale_info_t::uroll_inp_asgn_pix_mul_val_t()(cx, pix, xap);

                pix+=ch;
                for(i = (1 << 14) - xap; i > Cx; i -= Cx)
                {
                    //for(c = 0; c < ch; ++c) cx[c] += pix[c] * Cx;
                    typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(cx, pix, Cx);
                    pix+=ch;
                }

                if(i > 0)
                {
                    //for(c = 0; c < ch; ++c) cx[c] += pix[c] * i;
                    typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(cx, pix, i);
                }

                //for(c = 0; c < ch; ++c) comp[c] = (cx[c] >> 5) * yap;
                typename scale_info_t::uroll_comp_asgn_cx_rshft_cval_all_mul_val_t()(comp, cx, 5, yap);

                for(j = (1 << 14) - yap; j > Cy; j -= Cy)
                {
                    pix = sptr;
                    sptr += srcStride;

                    //for(c = 0; c < ch; ++c) cx[c] = pix[c] * xap;
                    typename scale_info_t::uroll_inp_asgn_pix_mul_val_t()(cx, pix, xap);

                    pix+=ch;
                    for(i = (1 << 14) - xap; i > Cx; i -= Cx)
                    {
                        //for(c = 0; c < ch; ++c) cx[c] += pix[c] * Cx;
                        typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(cx, pix, Cx);
                        pix+=ch;
                    }

                    if(i > 0)
                    {
                        //for(c = 0; c < ch; ++c) cx[c] += pix[c] * i;
                        typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(cx, pix, i);
                    }

                    //for(c = 0; c < ch; ++c) comp[c] += (cx[c] >> 5) * Cy;
                    typename scale_info_t::uroll_comp_plusasgn_cx_rshft_cval_all_mul_val_t()(comp, cx, 5, Cy);
                }

                if(j > 0)
                {
                    pix = sptr;
                    sptr += srcStride;

                    //for(c = 0; c < ch; ++c) cx[c] = pix[c] * xap;
                    typename scale_info_t::uroll_inp_asgn_pix_mul_val_t()(cx, pix, xap);

                    pix+=ch;
                    for(i = (1 << 14) - xap; i > Cx; i -= Cx)
                    {
                        //for(c = 0; c < ch; ++c) cx[c] += pix[c] * Cx;
                        typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(cx, pix, Cx);
                        pix+=ch;
                    }

                    if(i > 0)
                    {
                        //for(c = 0; c < ch; ++c) cx[c] += pix[c] * i;
                        typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(cx, pix, i);
                    }

                    //for(c = 0; c < ch; ++c) comp[c] += (cx[c] >> 5) * j;
                    typename scale_info_t::uroll_comp_plusasgn_cx_rshft_cval_all_mul_val_t()(comp, cx, 5, j);
                }

                //for(c = 0; c < ch; ++c) *dptr++ = (comp[c]>>23)&0xff;
                typename scale_info_t::uroll_uref_dptr_inc_asgn_comp_rshft_cval_and_ff_t()(dptr, comp, 23);
            }
        }
    } //else
}

//wrapper
static void bilinear_scale(const U8 *src, U32 srcW, U32 srcH, U32 srcCh, U32 srcStride, U8 *dst, U32 dstW, U32 dstH, U32 dstCh, U32 dstStride)
{
    llassert(srcCh == dstCh);

    switch(srcCh)
    {
    case 1:
        bilinear_scale<1>(src, srcW, srcH, srcStride, dst, dstW, dstH, dstStride);
        break;
    case 3:
        bilinear_scale<3>(src, srcW, srcH, srcStride, dst, dstW, dstH, dstStride);
        break;
    case 4:
        bilinear_scale<4>(src, srcW, srcH, srcStride, dst, dstW, dstH, dstStride);
        break;
    default:
        llassert(!"Implement if need");
        break;
    }

}

//---------------------------------------------------------------------------
// LLImage
//---------------------------------------------------------------------------

//static
thread_local std::string LLImage::sLastThreadErrorMessage;
bool LLImage::sUseNewByteRange = false;
S32  LLImage::sMinimalReverseByteRangePercent = 75;

//static
void LLImage::initClass(bool use_new_byte_range, S32 minimal_reverse_byte_range_percent)
{
    sUseNewByteRange = use_new_byte_range;
    sMinimalReverseByteRangePercent = minimal_reverse_byte_range_percent;
}

//static
void LLImage::cleanupClass()
{
}

//static
const std::string& LLImage::getLastThreadError()
{
    static const std::string noerr("No Error");
    return sLastThreadErrorMessage.empty() ? noerr : sLastThreadErrorMessage;
}

//static
void LLImage::setLastError(const std::string& message)
{
    sLastThreadErrorMessage = message;
}

//---------------------------------------------------------------------------
// LLImageBase
//---------------------------------------------------------------------------

LLImageBase::LLImageBase()
:   mData(NULL),
    mDataSize(0),
    mWidth(0),
    mHeight(0),
    mComponents(0),
    mBadBufferAllocation(false),
    mAllowOverSize(false)
{}

// virtual
LLImageBase::~LLImageBase()
{
    deleteData(); // virtual
}

// virtual
void LLImageBase::dump()
{
    LL_INFOS() << "LLImageBase mComponents " << mComponents
        << " mData " << mData
        << " mDataSize " << mDataSize
        << " mWidth " << mWidth
        << " mHeight " << mHeight
        << LL_ENDL;
}

// virtual
void LLImageBase::sanityCheck()
{
    if (mWidth > MAX_IMAGE_SIZE
        || mHeight > MAX_IMAGE_SIZE
        || mDataSize > (S32)MAX_IMAGE_DATA_SIZE
        || mComponents > (S8)MAX_IMAGE_COMPONENTS
        )
    {
        LL_ERRS() << "Failed LLImageBase::sanityCheck "
               << "width " << mWidth
               << "height " << mHeight
               << "datasize " << mDataSize
               << "components " << mComponents
               << "data " << mData
               << LL_ENDL;
    }
}

// virtual
void LLImageBase::deleteData()
{
    ll_aligned_free_16(mData);
    mDataSize = 0;
    mData = NULL;
}

// virtual
U8* LLImageBase::allocateData(S32 size)
{
    //make this function thread-safe.
    static const U32 MAX_BUFFER_SIZE = 4096 * 4096 * 16; //256 MB
    mBadBufferAllocation = false;

    if (size < 0)
    {
        size = mWidth * mHeight * mComponents;
        if (size <= 0)
        {
            LL_WARNS() << llformat("LLImageBase::allocateData called with bad dimensions: %dx%dx%d",mWidth,mHeight,(S32)mComponents) << LL_ENDL;
            mBadBufferAllocation = true;
        }
    }

    if (!mBadBufferAllocation && (size < 1 || size > MAX_BUFFER_SIZE))
    {
        LL_INFOS() << "width: " << mWidth << " height: " << mHeight << " components: " << mComponents << LL_ENDL ;
        if(mAllowOverSize)
        {
            LL_INFOS() << "Oversize: " << size << LL_ENDL ;
        }
        else
        {
            LL_WARNS() << "LLImageBase::allocateData: bad size: " << size << LL_ENDL;
            mBadBufferAllocation = true;
        }
    }

    if (!mBadBufferAllocation && (!mData || size != mDataSize))
    {
        deleteData(); // virtual
        mData = (U8*)ll_aligned_malloc_16(size);
        if (!mData)
        {
            LL_WARNS() << "Failed to allocate image data size [" << size << "]" << LL_ENDL;
            mBadBufferAllocation = true;
        }
    }

    if (mBadBufferAllocation)
    {
        size = 0;
        mWidth = mHeight = 0;
        if (mData)
        {
            deleteData(); // virtual
            mData = NULL;
        }
    }
    mDataSize = size;

    return mData;
}

// virtual
U8* LLImageBase::reallocateData(S32 size)
{
    U8 *new_datap = (U8*)ll_aligned_malloc_16(size);
    if (!new_datap)
    {
        LL_WARNS() << "Out of memory in LLImageBase::reallocateData" << LL_ENDL;
        return 0;
    }
    if (mData)
    {
        S32 bytes = llmin(mDataSize, size);
        memcpy(new_datap, mData, bytes);    /* Flawfinder: ignore */
        ll_aligned_free_16(mData) ;
    }
    mData = new_datap;
    mDataSize = size;
    mBadBufferAllocation = false;
    return mData;
}

const U8* LLImageBase::getData() const
{
    if(mBadBufferAllocation)
    {
        LL_WARNS() << "Bad memory allocation for the image buffer!" << LL_ENDL ;
        return NULL;
    }

    return mData;
} // read only

U8* LLImageBase::getData()
{
    if(mBadBufferAllocation)
    {
        LL_WARNS() << "Bad memory allocation for the image buffer!" << LL_ENDL;
        return NULL;
    }

    return mData;
}

bool LLImageBase::isBufferInvalid() const
{
    return mBadBufferAllocation || mData == NULL;
}

void LLImageBase::setSize(S32 width, S32 height, S32 ncomponents)
{
    mWidth = width;
    mHeight = height;
    mComponents = ncomponents;
}

U8* LLImageBase::allocateDataSize(S32 width, S32 height, S32 ncomponents, S32 size)
{
    setSize(width, height, ncomponents);
    return allocateData(size); // virtual
}

//---------------------------------------------------------------------------
// LLImageRaw
//---------------------------------------------------------------------------

S32 LLImageRaw::sRawImageCount = 0;

LLImageRaw::LLImageRaw()
    : LLImageBase()
{
    ++sRawImageCount;
}

LLImageRaw::LLImageRaw(U16 width, U16 height, S8 components)
    : LLImageBase()
{
    //llassert( S32(width) * S32(height) * S32(components) <= MAX_IMAGE_DATA_SIZE );
    allocateDataSize(width, height, components);
    ++sRawImageCount;
}

LLImageRaw::LLImageRaw(const U8* data, U16 width, U16 height, S8 components)
    : LLImageBase()
{
    if (allocateDataSize(width, height, components))
    {
        memcpy(getData(), data, width * height * components);
    }
}

LLImageRaw::LLImageRaw(U8 *data, U16 width, U16 height, S8 components, bool no_copy)
    : LLImageBase()
{
    if(no_copy)
    {
        setDataAndSize(data, width, height, components);
    }
    else if(allocateDataSize(width, height, components))
    {
        memcpy(getData(), data, width*height*components);
    }
    ++sRawImageCount;
}

//LLImageRaw::LLImageRaw(const std::string& filename, bool j2c_lowest_mip_only)
//  : LLImageBase()
//{
//  createFromFile(filename, j2c_lowest_mip_only);
//}

LLImageRaw::~LLImageRaw()
{
    // NOTE: ~LLimageBase() call to deleteData() calls LLImageBase::deleteData()
    //        NOT LLImageRaw::deleteData()
    deleteData();
    --sRawImageCount;
}

// virtual
U8* LLImageRaw::allocateData(S32 size)
{
    LLImageDataLock lock(this);

    U8* res = LLImageBase::allocateData(size);
    return res;
}

// virtual
U8* LLImageRaw::reallocateData(S32 size)
{
    LLImageDataLock lock(this);

    U8* res = LLImageBase::reallocateData(size);
    return res;
}

void LLImageRaw::releaseData()
{
    LLImageDataLock lock(this);

    LLImageBase::setSize(0, 0, 0);
    LLImageBase::setDataAndSize(nullptr, 0);
}

// virtual
void LLImageRaw::deleteData()
{
    LLImageDataLock lock(this);

    LLImageBase::deleteData();
}

void LLImageRaw::setDataAndSize(U8 *data, S32 width, S32 height, S8 components)
{
    LLImageDataLock lock(this);

    if(data == getData())
    {
        return ;
    }

    deleteData();

    LLImageBase::setSize(width, height, components) ;
    LLImageBase::setDataAndSize(data, width * height * components) ;
}

bool LLImageRaw::resize(U16 width, U16 height, S8 components)
{
    LLImageDataLock lock(this);

    if ((getWidth() == width) && (getHeight() == height) && (getComponents() == components) && !isBufferInvalid())
    {
        return true;
    }
    // Reallocate the data buffer.
    deleteData();

    allocateDataSize(width,height,components);

    return !isBufferInvalid();
}

bool LLImageRaw::setSubImage(U32 x_pos, U32 y_pos, U32 width, U32 height,
                             const U8 *data, U32 stride, bool reverse_y)
{
    LLImageDataLock lock(this);

    if (!getData())
    {
        return false;
    }
    if (!data)
    {
        return false;
    }

    // Should do some simple bounds checking

    U32 i;
    for (i = 0; i < height; i++)
    {
        const U32 row = reverse_y ? height - 1 - i : i;
        const U32 from_offset = row * ((stride == 0) ? width*getComponents() : stride);
        const U32 to_offset = (y_pos + i)*getWidth() + x_pos;
        memcpy(getData() + to_offset*getComponents(),       /* Flawfinder: ignore */
                data + from_offset, getComponents()*width);
    }

    return true;
}

void LLImageRaw::clear(U8 r, U8 g, U8 b, U8 a)
{
    llassert( getComponents() <= 4 );

    LLImageDataLock lock(this);

    // This is fairly bogus, but it'll do for now.
    if (isBufferInvalid())
    {
        LL_WARNS() << "Invalid image buffer" << LL_ENDL;
        return;
    }

    U8 *pos = getData();
    U32 x, y;
    for (x = 0; x < getWidth(); x++)
    {
        for (y = 0; y < getHeight(); y++)
        {
            *pos = r;
            pos++;
            if (getComponents() == 1)
            {
                continue;
            }
            *pos = g;
            pos++;
            if (getComponents() == 2)
            {
                continue;
            }
            *pos = b;
            pos++;
            if (getComponents() == 3)
            {
                continue;
            }
            *pos = a;
            pos++;
        }
    }
}

// Reverses the order of the rows in the image
void LLImageRaw::verticalFlip()
{
    LLImageDataLock lock(this);

    S32 row_bytes = getWidth() * getComponents();
    llassert(row_bytes > 0);
    std::vector<U8> line_buffer(row_bytes);
    S32 mid_row = getHeight() / 2;
    for( S32 row = 0; row < mid_row; row++ )
    {
        U8* row_a_data = getData() + row * row_bytes;
        U8* row_b_data = getData() + (getHeight() - 1 - row) * row_bytes;
        memcpy( &line_buffer[0], row_a_data,  row_bytes );
        memcpy( row_a_data,  row_b_data,  row_bytes );
        memcpy( row_b_data,  &line_buffer[0], row_bytes );
    }
}


bool LLImageRaw::checkHasTransparentPixels()
{
    if (getComponents() != 4)
    {
        return false;
    }

    U8* data = getData();
    U32 pixels = getWidth() * getHeight();

    // check alpha channel for all 255
    for (U32 i = 0; i < pixels; ++i)
    {
        if (data[i * 4 + 3] != 255)
        {
            return true;
        }
    }

    return false;
}

bool LLImageRaw::optimizeAwayAlpha()
{
    LLImageDataLock lock(this);

    if (getComponents() == 4)
    {
        U8* data = getData();
        U32 pixels = getWidth() * getHeight();

        // check alpha channel for all 255
        for (U32 i = 0; i < pixels; ++i)
        {
            if (data[i * 4 + 3] != 255)
            {
                return false;
            }
        }

        // alpha channel is all 255, make a new copy of data without alpha channel
        U8* new_data = (U8*) ll_aligned_malloc_16(getWidth() * getHeight() * 3);

        for (U32 i = 0; i < pixels; ++i)
        {
            U32 di = i * 3;
            U32 si = i * 4;
            for (U32 j = 0; j < 3; ++j)
            {
                new_data[di+j] = data[si+j];
            }
        }

        setDataAndSize(new_data, getWidth(), getHeight(), 3);

        return true;
    }

    return false;
}

bool LLImageRaw::makeAlpha()
{
    if (getComponents() == 3)
    {
        U8* data = getData();
        U32 pixels = getWidth() * getHeight();

        // alpha channel doesn't exist, make a new copy of data with alpha channel
        U8* new_data = (U8*) ll_aligned_malloc_16(getWidth() * getHeight() * 4);

        for (U32 i = 0; i < pixels; ++i)
        {
            U32 di = i * 4;
            U32 si = i * 3;
            for (U32 j = 0; j < 3; ++j)
            {
                new_data[di+j] = data[si+j];
            }
        }

        setDataAndSize(new_data, getWidth(), getHeight(), 3);

        return true;
    }

    return false;
}

void LLImageRaw::expandToPowerOfTwo(S32 max_dim, bool scale_image)
{
    LLImageDataLock lock(this);

    // Find new sizes
    S32 new_width  = expandDimToPowerOfTwo(getWidth(), max_dim);
    S32 new_height = expandDimToPowerOfTwo(getHeight(), max_dim);

    scale( new_width, new_height, scale_image );
}

void LLImageRaw::contractToPowerOfTwo(S32 max_dim, bool scale_image)
{
    LLImageDataLock lock(this);

    // Find new sizes
    S32 new_width  = contractDimToPowerOfTwo(getWidth(), MIN_IMAGE_SIZE);
    S32 new_height = contractDimToPowerOfTwo(getHeight(), MIN_IMAGE_SIZE);

    scale( new_width, new_height, scale_image );
}

// static
S32 LLImageRaw::biasedDimToPowerOfTwo(S32 curr_dim, S32 max_dim)
{
    // Strong bias towards rounding down (to save bandwidth)
    // No bias would mean THRESHOLD == 1.5f;
    const F32 THRESHOLD = 1.75f;

    // Find new sizes
    S32 larger_dim  = max_dim;  // 2^n >= curr_dim
    S32 smaller_dim = max_dim;  // 2^(n-1) <= curr_dim
    while( (smaller_dim > curr_dim) && (smaller_dim > MIN_IMAGE_SIZE) )
    {
        larger_dim = smaller_dim;
        smaller_dim >>= 1;
    }
    return ( ((F32)curr_dim / (F32)smaller_dim) > THRESHOLD ) ? larger_dim : smaller_dim;
}

// static
S32 LLImageRaw::expandDimToPowerOfTwo(S32 curr_dim, S32 max_dim)
{
    S32 new_dim = MIN_IMAGE_SIZE;
    while( (new_dim < curr_dim) && (new_dim < max_dim) )
    {
        new_dim <<= 1;
    }
    return new_dim;
}

// static
S32 LLImageRaw::contractDimToPowerOfTwo(S32 curr_dim, S32 min_dim)
{
    S32 new_dim = MAX_IMAGE_SIZE;
    while( (new_dim > curr_dim) && (new_dim > min_dim) )
    {
        new_dim >>= 1;
    }
    return new_dim;
}

void LLImageRaw::biasedScaleToPowerOfTwo(S32 max_dim)
{
    LLImageDataLock lock(this);

    // Find new sizes
    S32 new_width  = biasedDimToPowerOfTwo(getWidth(),max_dim);
    S32 new_height = biasedDimToPowerOfTwo(getHeight(),max_dim);

    scale( new_width, new_height );
}

// static
// Calculates (U8)(255*(a/255.f)*(b/255.f) + 0.5f).  Thanks, Jim Blinn!
inline U8 LLImageRaw::fastFractionalMult( U8 a, U8 b )
{
    U32 i = a * b + 128;
    return U8((i + (i>>8)) >> 8);
}


void LLImageRaw::composite( const LLImageRaw* src )
{
    LLImageRaw* dst = this;  // Just for clarity.

    LLImageDataSharedLock lockIn(src);
    LLImageDataLock lockOut(this);

    if (!validateSrcAndDst("LLImageRaw::composite", src, dst))
    {
        return;
    }

    llassert((3 == src->getComponents()) || (4 == src->getComponents()));
    llassert(3 == dst->getComponents());

    if( 3 == dst->getComponents() )
    {
        if( (src->getWidth() == dst->getWidth()) && (src->getHeight() == dst->getHeight()) )
        {
            // No scaling needed
            if( 3 == src->getComponents() )
            {
                copyUnscaled( src );  // alpha is one so just copy the data.
            }
            else
            {
                compositeUnscaled4onto3( src );
            }
        }
        else
        {
            if( 3 == src->getComponents() )
            {
                copyScaled( src );  // alpha is one so just copy the data.
            }
            else
            {
                compositeScaled4onto3( src );
            }
        }
    }
}


// Src and dst can be any size.  Src has 4 components.  Dst has 3 components.
void LLImageRaw::compositeScaled4onto3(const LLImageRaw* src)
{
    LL_INFOS() << "compositeScaled4onto3" << LL_ENDL;

    LLImageRaw* dst = this;  // Just for clarity.

    LLImageDataLock lock(this);

    llassert( (4 == src->getComponents()) && (3 == dst->getComponents()) );

    S32 temp_data_size = src->getWidth() * dst->getHeight() * src->getComponents();
    llassert_always(temp_data_size > 0);
    std::vector<U8> temp_buffer(temp_data_size);

    // Vertical: scale but no composite
    for( S32 col = 0; col < src->getWidth(); col++ )
    {
        copyLineScaled( src->getData() + (src->getComponents() * col), &temp_buffer[0] + (src->getComponents() * col), src->getHeight(), dst->getHeight(), src->getWidth(), src->getWidth() );
    }

    // Horizontal: scale and composite
    for( S32 row = 0; row < dst->getHeight(); row++ )
    {
        compositeRowScaled4onto3( &temp_buffer[0] + (src->getComponents() * src->getWidth() * row), dst->getData() + (dst->getComponents() * dst->getWidth() * row), src->getWidth(), dst->getWidth() );
    }
}


// Src and dst are same size.  Src has 4 components.  Dst has 3 components.
void LLImageRaw::compositeUnscaled4onto3( const LLImageRaw* src )
{
    LLImageRaw* dst = this;  // Just for clarity.

    LLImageDataLock lock(this);

    llassert( (3 == src->getComponents()) || (4 == src->getComponents()) );
    llassert( (src->getWidth() == dst->getWidth()) && (src->getHeight() == dst->getHeight()) );

    const U8* src_data = src->getData();
    U8* dst_data = dst->getData();
    S32 pixels = getWidth() * getHeight();
    while( pixels-- )
    {
        U8 alpha = src_data[3];
        if( alpha )
        {
            if( 255 == alpha )
            {
                dst_data[0] = src_data[0];
                dst_data[1] = src_data[1];
                dst_data[2] = src_data[2];
            }
            else
            {

                U8 transparency = 255 - alpha;
                dst_data[0] = fastFractionalMult( dst_data[0], transparency ) + fastFractionalMult( src_data[0], alpha );
                dst_data[1] = fastFractionalMult( dst_data[1], transparency ) + fastFractionalMult( src_data[1], alpha );
                dst_data[2] = fastFractionalMult( dst_data[2], transparency ) + fastFractionalMult( src_data[2], alpha );
            }
        }

        src_data += 4;
        dst_data += 3;
    }
}


void LLImageRaw::copyUnscaledAlphaMask( const LLImageRaw* src, const LLColor4U& fill)
{
    LLImageRaw* dst = this;  // Just for clarity.

    LLImageDataSharedLock lockIn(src);
    LLImageDataLock lockOut(this);

    if (!validateSrcAndDst("LLImageRaw::copyUnscaledAlphaMask", src, dst))
    {
        return;
    }

    llassert( 1 == src->getComponents() );
    llassert( 4 == dst->getComponents() );
    llassert( (src->getWidth() == dst->getWidth()) && (src->getHeight() == dst->getHeight()) );

    S32 pixels = getWidth() * getHeight();
    const U8* src_data = src->getData();
    U8* dst_data = dst->getData();
    for ( S32 i = 0; i < pixels; i++ )
    {
        dst_data[0] = fill.mV[0];
        dst_data[1] = fill.mV[1];
        dst_data[2] = fill.mV[2];
        dst_data[3] = src_data[0];
        src_data += 1;
        dst_data += 4;
    }
}


// Fill the buffer with a constant color
void LLImageRaw::fill( const LLColor4U& color )
{
    LLImageDataLock lock(this);

    if (isBufferInvalid())
    {
        LL_WARNS() << "Invalid image buffer" << LL_ENDL;
        return;
    }

    S32 pixels = getWidth() * getHeight();
    if( 4 == getComponents() )
    {
        U32* data = (U32*) getData();
        U32 rgbaColor = color.asRGBA();
        for( S32 i = 0; i < pixels; i++ )
        {
            data[ i ] = rgbaColor;
        }
    }
    else
    if( 3 == getComponents() )
    {
        U8* data = getData();
        for( S32 i = 0; i < pixels; i++ )
        {
            data[0] = color.mV[0];
            data[1] = color.mV[1];
            data[2] = color.mV[2];
            data += 3;
        }
    }
}

void LLImageRaw::tint( const LLColor3& color )
{
    llassert( (3 == getComponents()) || (4 == getComponents()) );
    if (isBufferInvalid())
    {
        LL_WARNS() << "Invalid image buffer" << LL_ENDL;
        return;
    }

    S32 pixels = getWidth() * getHeight();
    const S32 components = getComponents();
    U8* data = getData();
    for( S32 i = 0; i < pixels; i++ )
    {
        const float c0 = data[0] * color.mV[0];
        const float c1 = data[1] * color.mV[1];
        const float c2 = data[2] * color.mV[2];
        data[0] = llclamp((U8)c0, 0, 255);
        data[1] = llclamp((U8)c1, 0, 255);
        data[2] = llclamp((U8)c2, 0, 255);
        data += components;
    }
}

LLPointer<LLImageRaw> LLImageRaw::duplicate()
{
    if(getNumRefs() < 2)
    {
        return this; //nobody else refences to this image, no need to duplicate.
    }

    LLImageDataSharedLock lock(this);

    //make a duplicate
    LLPointer<LLImageRaw> dup = new LLImageRaw(getData(), getWidth(), getHeight(), getComponents());
    return dup;
}

// Src and dst can be any size.  Src and dst can each have 3 or 4 components.
void LLImageRaw::copy(const LLImageRaw* src)
{
    LLImageRaw* dst = this;  // Just for clarity.

    LLImageDataSharedLock lockIn(src);
    LLImageDataLock lockOut(this);

    if (!validateSrcAndDst("LLImageRaw::copy", src, dst))
    {
        return;
    }

    if( (src->getWidth() == dst->getWidth()) && (src->getHeight() == dst->getHeight()) )
    {
        // No scaling needed
        if( src->getComponents() == dst->getComponents() )
        {
            copyUnscaled( src );
        }
        else
        if( 3 == src->getComponents() )
        {
            copyUnscaled3onto4( src );
        }
        else
        {
            // 4 == src->getComponents()
            copyUnscaled4onto3( src );
        }
    }
    else
    {
        // Scaling needed
        // No scaling needed
        if( src->getComponents() == dst->getComponents() )
        {
            copyScaled( src );
        }
        else
        if( 3 == src->getComponents() )
        {
            copyScaled3onto4( src );
        }
        else
        {
            // 4 == src->getComponents()
            copyScaled4onto3( src );
        }
    }
}

// Src and dst are same size.  Src and dst have same number of components.
void LLImageRaw::copyUnscaled(const LLImageRaw* src)
{
    LLImageRaw* dst = this;  // Just for clarity.

    LLImageDataLock lock(this);

    llassert( (1 == src->getComponents()) || (3 == src->getComponents()) || (4 == src->getComponents()) );
    llassert( src->getComponents() == dst->getComponents() );
    llassert( (src->getWidth() == dst->getWidth()) && (src->getHeight() == dst->getHeight()) );

    memcpy( dst->getData(), src->getData(), getWidth() * getHeight() * getComponents() );   /* Flawfinder: ignore */
}


// Src and dst can be any size.  Src has 3 components.  Dst has 4 components.
void LLImageRaw::copyScaled3onto4(const LLImageRaw* src)
{
    llassert( (3 == src->getComponents()) && (4 == getComponents()) );

    // Slow, but simple.  Optimize later if needed.
    LLImageRaw temp( src->getWidth(), src->getHeight(), 4);
    temp.copyUnscaled3onto4( src );
    copyScaled( &temp );
}


// Src and dst can be any size.  Src has 4 components.  Dst has 3 components.
void LLImageRaw::copyScaled4onto3(const LLImageRaw* src)
{
    llassert( (4 == src->getComponents()) && (3 == getComponents()) );

    // Slow, but simple.  Optimize later if needed.
    LLImageRaw temp( src->getWidth(), src->getHeight(), 3);
    temp.copyUnscaled4onto3( src );
    copyScaled( &temp );
}


// Src and dst are same size.  Src has 4 components.  Dst has 3 components.
void LLImageRaw::copyUnscaled4onto3( const LLImageRaw* src )
{
    LLImageRaw* dst = this;  // Just for clarity.

    LLImageDataLock lock(this);

    llassert( (3 == dst->getComponents()) && (4 == src->getComponents()) );
    llassert( (src->getWidth() == dst->getWidth()) && (src->getHeight() == dst->getHeight()) );

    S32 pixels = getWidth() * getHeight();
    const U8* src_data = src->getData();
    U8* dst_data = dst->getData();
    for( S32 i=0; i<pixels; i++ )
    {
        dst_data[0] = src_data[0];
        dst_data[1] = src_data[1];
        dst_data[2] = src_data[2];
        src_data += 4;
        dst_data += 3;
    }
}


// Src and dst are same size.  Src has 3 components.  Dst has 4 components.
void LLImageRaw::copyUnscaled3onto4( const LLImageRaw* src )
{
    LLImageRaw* dst = this;  // Just for clarity.

    LLImageDataLock lock(this);

    llassert( 3 == src->getComponents() );
    llassert( 4 == dst->getComponents() );
    llassert( (src->getWidth() == dst->getWidth()) && (src->getHeight() == dst->getHeight()) );

    S32 pixels = getWidth() * getHeight();
    const U8* src_data = src->getData();
    U8* dst_data = dst->getData();
    for( S32 i=0; i<pixels; i++ )
    {
        dst_data[0] = src_data[0];
        dst_data[1] = src_data[1];
        dst_data[2] = src_data[2];
        dst_data[3] = 255;
        src_data += 3;
        dst_data += 4;
    }
}


// Src and dst can be any size.  Src and dst have same number of components.
void LLImageRaw::copyScaled( const LLImageRaw* src )
{
    LLImageRaw* dst = this;  // Just for clarity.

    LLImageDataSharedLock lockIn(src);
    LLImageDataLock lockOut(this);

    if (!validateSrcAndDst("LLImageRaw::copyScaled", src, dst))
    {
        return;
    }

    llassert_always( (1 == src->getComponents()) || (3 == src->getComponents()) || (4 == src->getComponents()) );
    llassert_always( src->getComponents() == dst->getComponents() );

    if( (src->getWidth() == dst->getWidth()) && (src->getHeight() == dst->getHeight()) )
    {
        memcpy( dst->getData(), src->getData(), getWidth() * getHeight() * getComponents() );   /* Flawfinder: ignore */
        return;
    }

    bilinear_scale(
            src->getData(), src->getWidth(), src->getHeight(), src->getComponents(), src->getWidth()*src->getComponents()
        ,   dst->getData(), dst->getWidth(), dst->getHeight(), dst->getComponents(), dst->getWidth()*dst->getComponents()
    );

    /*
    S32 temp_data_size = src->getWidth() * dst->getHeight() * getComponents();
    llassert_always(temp_data_size > 0);
    std::vector<U8> temp_buffer(temp_data_size);

    // Vertical
    for( S32 col = 0; col < src->getWidth(); col++ )
    {
        copyLineScaled( src->getData() + (getComponents() * col), &temp_buffer[0] + (getComponents() * col), src->getHeight(), dst->getHeight(), src->getWidth(), src->getWidth() );
    }

    // Horizontal
    for( S32 row = 0; row < dst->getHeight(); row++ )
    {
        copyLineScaled( &temp_buffer[0] + (getComponents() * src->getWidth() * row), dst->getData() + (getComponents() * dst->getWidth() * row), src->getWidth(), dst->getWidth(), 1, 1 );
    }
    */
}


bool LLImageRaw::scale( S32 new_width, S32 new_height, bool scale_image_data )
{
    LLImageDataLock lock(this);

    S32 components = getComponents();
    if (components != 1 && components != 3 && components != 4)
    {
        LL_WARNS() << "Invalid getComponents value (" << components << ")" << LL_ENDL;
        return false;
    }

    if (isBufferInvalid())
    {
        LL_WARNS() << "Invalid image buffer" << LL_ENDL;
        return false;
    }

    S32 old_width = getWidth();
    S32 old_height = getHeight();

    if( (old_width == new_width) && (old_height == new_height) )
    {
        return true;  // Nothing to do.
    }

    // Reallocate the data buffer.

    if (scale_image_data)
    {
        S32 new_data_size = new_width * new_height * components;

        if (new_data_size > 0)
        {
            U8 *new_data = (U8*)ll_aligned_malloc_16(new_data_size);
            if(NULL == new_data)
            {
                return false;
            }

            bilinear_scale(getData(), old_width, old_height, components, old_width*components, new_data, new_width, new_height, components, new_width*components);
            setDataAndSize(new_data, new_width, new_height, components);
        }
    }
    else try
    {
        // copy out existing image data
        S32 temp_data_size = old_width * old_height * components;
        std::vector<U8> temp_buffer(temp_data_size);
        memcpy(&temp_buffer[0], getData(), temp_data_size);

        // allocate new image data, will delete old data
        U8* new_buffer = allocateDataSize(new_width, new_height, components);

        if (!new_buffer)
        {
            LL_WARNS() << "Failed to allocate new image data buffer" << LL_ENDL;
            return false;
        }

        for( S32 row = 0; row < new_height; row++ )
        {
            if (row < old_height)
            {
                memcpy(new_buffer + (new_width * row * components), &temp_buffer[0] + (old_width *  row * components),  components * llmin(old_width, new_width));
                if (old_width < new_width)
                {
                    // pad out rest of row with black
                    memset(new_buffer + (components * ((new_width * row) +  old_width)), 0, components * (new_width - old_width));
                }
            }
            else
            {
                // pad remaining rows with black
                memset(new_buffer + (new_width * row * components), 0,  new_width * components);
            }
        }
    }
    catch (std::bad_alloc&) // for temp_buffer
    {
        LL_WARNS() << "Failed to allocate temporary image buffer" << LL_ENDL;
        return false;
    }

    return true ;
}

LLPointer<LLImageRaw> LLImageRaw::scaled(S32 new_width, S32 new_height)
{
    LLPointer<LLImageRaw> result;

    LLImageDataLock lock(this);

    S32 components = getComponents();
    if (components != 1 && components != 3 && components != 4)
    {
        LL_WARNS() << "Invalid getComponents value (" << components << ")" << LL_ENDL;
        return result;
    }

    if (isBufferInvalid())
    {
        LL_WARNS() << "Invalid image buffer" << LL_ENDL;
        return result;
    }

    S32 old_width = getWidth();
    S32 old_height = getHeight();

    if ((old_width == new_width) && (old_height == new_height))
    {
        result = new LLImageRaw(old_width, old_height, components);
        if (!result || result->isBufferInvalid())
        {
            LL_WARNS() << "Failed to allocate new image" << LL_ENDL;
            return result;
        }
        memcpy(result->getData(), getData(), getDataSize());
    }
    else
    {
        S32 new_data_size = new_width * new_height * components;

        if (new_data_size > 0)
        {
            result = new LLImageRaw(new_width, new_height, components);
            if (!result || result->isBufferInvalid())
            {
                LL_WARNS() << "Failed to allocate new image" << LL_ENDL;
                return result;
            }
            bilinear_scale(getData(), old_width, old_height, components, old_width*components, result->getData(), new_width, new_height, components, new_width*components);
        }
    }

    return result;
}

void LLImageRaw::copyLineScaled( const U8* in, U8* out, S32 in_pixel_len, S32 out_pixel_len, S32 in_pixel_step, S32 out_pixel_step )
{
    const S32 components = getComponents();
    llassert( components >= 1 && components <= 4 );

    const F32 ratio = F32(in_pixel_len) / out_pixel_len; // ratio of old to new
    const F32 norm_factor = 1.f / ratio;

    S32 goff = components >= 2 ? 1 : 0;
    S32 boff = components >= 3 ? 2 : 0;
    for( S32 x = 0; x < out_pixel_len; x++ )
    {
        // Sample input pixels in range from sample0 to sample1.
        // Avoid floating point accumulation error... don't just add ratio each time.  JC
        const F32 sample0 = x * ratio;
        const F32 sample1 = (x+1) * ratio;
        const S32 index0 = llfloor(sample0);            // left integer (floor)
        const S32 index1 = llfloor(sample1);            // right integer (floor)
        const F32 fract0 = 1.f - (sample0 - F32(index0));   // spill over on left
        const F32 fract1 = sample1 - F32(index1);           // spill-over on right

        if( index0 == index1 )
        {
            // Interval is embedded in one input pixel
            S32 t0 = x * out_pixel_step * components;
            S32 t1 = index0 * in_pixel_step * components;
            U8* outp = out + t0;
            const U8* inp = in + t1;
            for (S32 i = 0; i < components; ++i)
            {
                *outp = *inp;
                ++outp;
                ++inp;
            }
        }
        else
        {
            // Left straddle
            S32 t1 = index0 * in_pixel_step * components;
            F32 r = in[t1 + 0] * fract0;
            F32 g = in[t1 + goff] * fract0;
            F32 b = in[t1 + boff] * fract0;
            F32 a = 0;
            if( components == 4)
            {
                a = in[t1 + 3] * fract0;
            }

            // Central interval
            if (components < 4)
            {
                for( S32 u = index0 + 1; u < index1; u++ )
                {
                    S32 t2 = u * in_pixel_step * components;
                    r += in[t2 + 0];
                    g += in[t2 + goff];
                    b += in[t2 + boff];
                }
            }
            else
            {
                for( S32 u = index0 + 1; u < index1; u++ )
                {
                    S32 t2 = u * in_pixel_step * components;
                    r += in[t2 + 0];
                    g += in[t2 + 1];
                    b += in[t2 + 2];
                    a += in[t2 + 3];
                }
            }

            // right straddle
            // Watch out for reading off of end of input array.
            if( fract1 && index1 < in_pixel_len )
            {
                S32 t3 = index1 * in_pixel_step * components;
                if (components < 4)
                {
                    U8 in0 = in[t3 + 0];
                    U8 in1 = in[t3 + goff];
                    U8 in2 = in[t3 + boff];
                    r += in0 * fract1;
                    g += in1 * fract1;
                    b += in2 * fract1;
                }
                else
                {
                    U8 in0 = in[t3 + 0];
                    U8 in1 = in[t3 + 1];
                    U8 in2 = in[t3 + 2];
                    U8 in3 = in[t3 + 3];
                    r += in0 * fract1;
                    g += in1 * fract1;
                    b += in2 * fract1;
                    a += in3 * fract1;
                }
            }

            r *= norm_factor;
            g *= norm_factor;
            b *= norm_factor;
            a *= norm_factor;  // skip conditional

            S32 t4 = x * out_pixel_step * components;
            out[t4 + 0] = U8(ll_round(r));
            if (components >= 2)
                out[t4 + 1] = U8(ll_round(g));
            if (components >= 3)
                out[t4 + 2] = U8(ll_round(b));
            if( components == 4)
                out[t4 + 3] = U8(ll_round(a));
        }
    }
}

void LLImageRaw::compositeRowScaled4onto3( const U8* in, U8* out, S32 in_pixel_len, S32 out_pixel_len )
{
    llassert( getComponents() == 3 );

    const S32 IN_COMPONENTS = 4;
    const S32 OUT_COMPONENTS = 3;

    const F32 ratio = F32(in_pixel_len) / out_pixel_len; // ratio of old to new
    const F32 norm_factor = 1.f / ratio;

    for( S32 x = 0; x < out_pixel_len; x++ )
    {
        // Sample input pixels in range from sample0 to sample1.
        // Avoid floating point accumulation error... don't just add ratio each time.  JC
        const F32 sample0 = x * ratio;
        const F32 sample1 = (x+1) * ratio;
        const S32 index0 = S32(sample0);            // left integer (floor)
        const S32 index1 = S32(sample1);            // right integer (floor)
        const F32 fract0 = 1.f - (sample0 - F32(index0));   // spill over on left
        const F32 fract1 = sample1 - F32(index1);           // spill-over on right

        U8 in_scaled_r;
        U8 in_scaled_g;
        U8 in_scaled_b;
        U8 in_scaled_a;

        if( index0 == index1 )
        {
            // Interval is embedded in one input pixel
            S32 t1 = index0 * IN_COMPONENTS;
            in_scaled_r = in[t1 + 0];
            in_scaled_g = in[t1 + 0];
            in_scaled_b = in[t1 + 0];
            in_scaled_a = in[t1 + 0];
        }
        else
        {
            // Left straddle
            S32 t1 = index0 * IN_COMPONENTS;
            F32 r = in[t1 + 0] * fract0;
            F32 g = in[t1 + 1] * fract0;
            F32 b = in[t1 + 2] * fract0;
            F32 a = in[t1 + 3] * fract0;

            // Central interval
            for( S32 u = index0 + 1; u < index1; u++ )
            {
                S32 t2 = u * IN_COMPONENTS;
                r += in[t2 + 0];
                g += in[t2 + 1];
                b += in[t2 + 2];
                a += in[t2 + 3];
            }

            // right straddle
            // Watch out for reading off of end of input array.
            if( fract1 && index1 < in_pixel_len )
            {
                S32 t3 = index1 * IN_COMPONENTS;
                r += in[t3 + 0] * fract1;
                g += in[t3 + 1] * fract1;
                b += in[t3 + 2] * fract1;
                a += in[t3 + 3] * fract1;
            }

            r *= norm_factor;
            g *= norm_factor;
            b *= norm_factor;
            a *= norm_factor;

            in_scaled_r = U8(ll_round(r));
            in_scaled_g = U8(ll_round(g));
            in_scaled_b = U8(ll_round(b));
            in_scaled_a = U8(ll_round(a));
        }

        if( in_scaled_a )
        {
            if( 255 == in_scaled_a )
            {
                out[0] = in_scaled_r;
                out[1] = in_scaled_g;
                out[2] = in_scaled_b;
            }
            else
            {
                U8 transparency = 255 - in_scaled_a;
                out[0] = fastFractionalMult( out[0], transparency ) + fastFractionalMult( in_scaled_r, in_scaled_a );
                out[1] = fastFractionalMult( out[1], transparency ) + fastFractionalMult( in_scaled_g, in_scaled_a );
                out[2] = fastFractionalMult( out[2], transparency ) + fastFractionalMult( in_scaled_b, in_scaled_a );
            }
        }
        out += OUT_COMPONENTS;
    }
}

void LLImageRaw::addEmissive(LLImageRaw* src)
{
    LLImageRaw* dst = this;  // Just for clarity.

    if (!validateSrcAndDst(__FUNCTION__, src, dst))
    {
        return;
    }

    llassert((3 == src->getComponents()) || (4 == src->getComponents()));
    llassert(3 == dst->getComponents());

    if( 3 == dst->getComponents() )
    {
        if( (src->getWidth() == dst->getWidth()) && (src->getHeight() == dst->getHeight()) )
        {
            addEmissiveUnscaled(src);
        }
        else
        {
            addEmissiveScaled(src);
        }
    }
}

void LLImageRaw::addEmissiveUnscaled(LLImageRaw* src)
{
    LLImageRaw* dst = this;  // Just for clarity.

    llassert((3 == src->getComponents()) || (4 == src->getComponents()));
    llassert((3 == dst->getComponents()) || (4 == dst->getComponents()));
    llassert( (src->getWidth() == dst->getWidth()) && (src->getHeight() == dst->getHeight()) );

    U8* const src_data = src->getData();
    U8* const dst_data = dst->getData();
    for(S32 y = 0; y < dst->getHeight(); ++y)
    {
        const S32 src_row_offset = src->getComponents() * src->getWidth() * y;
        const S32 dst_row_offset = dst->getComponents() * dst->getWidth() * y;
        for (S32 x = 0; x < dst->getWidth(); ++x)
        {
            const S32 src_offset = src_row_offset + (x * src->getComponents());
            const S32 dst_offset = dst_row_offset + (x * dst->getComponents());
            U8* const src_pixel = src_data + src_offset;
            U8* const dst_pixel = dst_data + dst_offset;
            dst_pixel[0] = llmin(255, dst_pixel[0] + src_pixel[0]);
            dst_pixel[1] = llmin(255, dst_pixel[1] + src_pixel[1]);
            dst_pixel[2] = llmin(255, dst_pixel[2] + src_pixel[2]);
        }
    }
}

void LLImageRaw::addEmissiveScaled(LLImageRaw* src)
{
    LLImageRaw* dst = this;  // Just for clarity.

    llassert( (4 == src->getComponents()) && (3 == dst->getComponents()) );

    LLImageRaw temp(dst->getWidth(), dst->getHeight(), dst->getComponents());
    llassert_always(temp.getDataSize() > 0);
    temp.copyScaled(src);

    dst->addEmissiveUnscaled(&temp);
}

bool LLImageRaw::validateSrcAndDst(std::string func, const LLImageRaw* src, const LLImageRaw* dst)
{
    LLImageDataSharedLock lockIn(src);
    LLImageDataLock lockOut(dst);

    if (!src || !dst || src->isBufferInvalid() || dst->isBufferInvalid())
    {
        LL_WARNS() << func << ": Source: ";
        if (!src) LL_CONT << "Null pointer";
        else if (src->isBufferInvalid()) LL_CONT << "Invalid buffer";
        else LL_CONT << "OK";

        LL_CONT << "; Destination: ";
        if (!dst) LL_CONT << "Null pointer";
        else if (dst->isBufferInvalid()) LL_CONT << "Invalid buffer";
        else LL_CONT << "OK";
        LL_CONT << "." << LL_ENDL;

        return false;
    }
    return true;
}

//----------------------------------------------------------------------------

static struct
{
    const char* exten;
    EImageCodec codec;
}
file_extensions[] =
{
    { "bmp", IMG_CODEC_BMP },
    { "tga", IMG_CODEC_TGA },
    { "j2c", IMG_CODEC_J2C },
    { "jp2", IMG_CODEC_J2C },
    { "texture", IMG_CODEC_J2C },
    { "jpg", IMG_CODEC_JPEG },
    { "jpeg", IMG_CODEC_JPEG },
    { "mip", IMG_CODEC_DXT },
    { "dxt", IMG_CODEC_DXT },
    { "png", IMG_CODEC_PNG }
};
#define NUM_FILE_EXTENSIONS LL_ARRAY_SIZE(file_extensions)
#if 0
static std::string find_file(std::string &name, S8 *codec)
{
    std::string tname;
    for (int i=0; i<(int)(NUM_FILE_EXTENSIONS); i++)
    {
        tname = name + "." + std::string(file_extensions[i].exten);
        llifstream ifs(tname.c_str(), llifstream::binary);
        if (ifs.is_open())
        {
            ifs.close();
            if (codec)
                *codec = file_extensions[i].codec;
            return std::string(file_extensions[i].exten);
        }
    }
    return std::string("");
}
#endif
EImageCodec LLImageBase::getCodecFromExtension(const std::string& exten)
{
    if (!exten.empty())
    {
        for (int i = 0; i < (int)(NUM_FILE_EXTENSIONS); i++)
        {
            if (exten == file_extensions[i].exten)
                return file_extensions[i].codec;
        }
    }
    return IMG_CODEC_INVALID;
}
#if 0
bool LLImageRaw::createFromFile(const std::string &filename, bool j2c_lowest_mip_only)
{
    std::string name = filename;
    size_t dotidx = name.rfind('.');
    S8 codec = IMG_CODEC_INVALID;
    std::string exten;

    deleteData(); // delete any existing data

    if (dotidx != std::string::npos)
    {
        exten = name.substr(dotidx+1);
        LLStringUtil::toLower(exten);
        codec = getCodecFromExtension(exten);
    }
    else
    {
        exten = find_file(name, &codec);
        name = name + "." + exten;
    }
    if (codec == IMG_CODEC_INVALID)
    {
        return false; // format not recognized
    }

    llifstream ifs(name.c_str(), llifstream::binary);
    if (!ifs.is_open())
    {
        // SJB: changed from LL_INFOS() to LL_DEBUGS() to reduce spam
        LL_DEBUGS() << "Unable to open image file: " << name << LL_ENDL;
        return false;
    }

    ifs.seekg (0, std::ios::end);
    int length = ifs.tellg();
    if (j2c_lowest_mip_only && length > 2048)
    {
        length = 2048;
    }
    ifs.seekg (0, std::ios::beg);

    if (!length)
    {
        LL_INFOS() << "Zero length file file: " << name << LL_ENDL;
        return false;
    }

    LLPointer<LLImageFormatted> image = LLImageFormatted::createFromType(codec);
    llassert(image.notNull());

    U8 *buffer = image->allocateData(length);
    ifs.read ((char*)buffer, length);
    ifs.close();

    bool success;

    success = image->updateData();
    if (success)
    {
        if (j2c_lowest_mip_only && codec == IMG_CODEC_J2C)
        {
            S32 width = image->getWidth();
            S32 height = image->getHeight();
            S32 discard_level = 0;
            while (width > 1 && height > 1 && discard_level < MAX_DISCARD_LEVEL)
            {
                width >>= 1;
                height >>= 1;
                discard_level++;
            }
            ((LLImageJ2C *)((LLImageFormatted*)image))->setDiscardLevel(discard_level);
        }
        success = image->decode(this, 100000.0f);
    }

    image = NULL; // deletes image
    if (!success)
    {
        deleteData();
        LL_WARNS() << "Unable to decode image" << name << LL_ENDL;
        return false;
    }

    return true;
}
#endif
//---------------------------------------------------------------------------
// LLImageFormatted
//---------------------------------------------------------------------------

//static
S32 LLImageFormatted::sGlobalFormattedMemory = 0;

LLImageFormatted::LLImageFormatted(S8 codec)
    : LLImageBase(),
      mCodec(codec),
      mDecoding(0),
      mDecoded(0),
      mDiscardLevel(-1),
      mLevels(0)
{
}

// virtual
LLImageFormatted::~LLImageFormatted()
{
    // NOTE: ~LLimageBase() call to deleteData() calls LLImageBase::deleteData()
    //        NOT LLImageFormatted::deleteData()
    deleteData();
}

//----------------------------------------------------------------------------

//virtual
void LLImageFormatted::resetLastError()
{
    LLImage::setLastError("");
}

//virtual
void LLImageFormatted::setLastError(const std::string& message, const std::string& filename)
{
    std::string error = message;
    if (!filename.empty())
        error += std::string(" FILE: ") + filename;
    LLImage::setLastError(error);
}

//----------------------------------------------------------------------------

// static
LLImageFormatted* LLImageFormatted::createFromType(S8 codec)
{
    LLImageFormatted* image;
    switch(codec)
    {
      case IMG_CODEC_BMP:
        image = new LLImageBMP();
        break;
      case IMG_CODEC_TGA:
        image = new LLImageTGA();
        break;
      case IMG_CODEC_JPEG:
        image = new LLImageJPEG();
        break;
      case IMG_CODEC_PNG:
        image = new LLImagePNG();
        break;
      case IMG_CODEC_J2C:
        image = new LLImageJ2C();
        break;
      case IMG_CODEC_DXT:
        image = new LLImageDXT();
        break;
      default:
        image = NULL;
        break;
    }
    return image;
}

// static
S8 LLImageFormatted::getCodecFromMimeType(std::string_view mimetype)
{
    if (mimetype == "image/bmp")
    {
        return IMG_CODEC_BMP;
    }
    else if (mimetype == "image/tga")
    {
        return IMG_CODEC_TGA;
    }
    else if (mimetype == "image/jpeg")
    {
        return IMG_CODEC_JPEG;
    }
    else if (mimetype == "image/png")
    {
        return IMG_CODEC_PNG;
    }
    else if (mimetype == "image/j2c")
    {
        return IMG_CODEC_J2C;
    }
    else if (mimetype == "image/dxt")
    {
        return IMG_CODEC_DXT;
    }
    return IMG_CODEC_INVALID;
}

// static
LLImageFormatted* LLImageFormatted::createFromMimeType(std::string_view mimetype)
{
    S8 codec = getCodecFromMimeType(mimetype);
    return createFromType(codec);
}

// static
LLImageFormatted* LLImageFormatted::loadFromMemory(const U8* data_in, U32 size, std::string_view mimetype)
{
    LLImageFormatted* image = createFromMimeType(mimetype);
    if (image)
    {
        U8* data = image->allocateData(size);
        memcpy(data, data_in, size);

        if (!image->updateData())
        {
            delete image;
            image = NULL;
        }
    }
    return image;
}

// static
LLImageFormatted* LLImageFormatted::createFromExtension(const std::string& instring)
{
    std::string exten;
    size_t dotidx = instring.rfind('.');
    if (dotidx != std::string::npos)
    {
        exten = instring.substr(dotidx+1);
    }
    else
    {
        exten = instring;
    }
    S8 codec = getCodecFromExtension(exten);
    return createFromType(codec);
}
//----------------------------------------------------------------------------

// virtual
void LLImageFormatted::dump()
{
    LLImageBase::dump();

    LL_INFOS() << "LLImageFormatted"
            << " mDecoding " << mDecoding
            << " mCodec " << S32(mCodec)
            << " mDecoded " << mDecoded
            << LL_ENDL;
}

//----------------------------------------------------------------------------

S32 LLImageFormatted::calcDataSize(S32 discard_level)
{
    if (discard_level < 0)
    {
        discard_level = mDiscardLevel;
    }
    S32 w = getWidth() >> discard_level;
    S32 h = getHeight() >> discard_level;
    w = llmax(w, 1);
    h = llmax(h, 1);
    return w * h * getComponents();
}

S32 LLImageFormatted::calcDiscardLevelBytes(S32 bytes)
{
    llassert(bytes >= 0);
    S32 discard_level = 0;
    while (1)
    {
        S32 bytes_needed = calcDataSize(discard_level); // virtual
        if (bytes_needed <= bytes)
        {
            break;
        }
        discard_level++;
        if (discard_level > MAX_IMAGE_MIP)
        {
            return -1;
        }
    }
    return discard_level;
}


//----------------------------------------------------------------------------

// Subclasses that can handle more than 4 channels should override this function.
bool LLImageFormatted::decodeChannels(LLImageRaw* raw_image,F32  decode_time, S32 first_channel, S32 max_channel)
{
    llassert( (first_channel == 0) && (max_channel == 4) );
    return decode( raw_image, decode_time );  // Loads first 4 channels by default.
}

//----------------------------------------------------------------------------

// virtual
U8* LLImageFormatted::allocateData(S32 size)
{
    LLImageDataLock lock(this);

    U8* res = LLImageBase::allocateData(size); // calls deleteData()
    sGlobalFormattedMemory += getDataSize();
    return res;
}

// virtual
U8* LLImageFormatted::reallocateData(S32 size)
{
    LLImageDataLock lock(this);

    sGlobalFormattedMemory -= getDataSize();
    U8* res = LLImageBase::reallocateData(size);
    sGlobalFormattedMemory += getDataSize();
    return res;
}

// virtual
void LLImageFormatted::deleteData()
{
    LLImageDataLock lock(this);

    if (mDecoding)
    {
        LL_ERRS() << "LLImageFormatted::deleteData() is called during decoding" << LL_ENDL;
    }
    sGlobalFormattedMemory -= getDataSize();
    LLImageBase::deleteData();
}

//----------------------------------------------------------------------------

// virtual
void LLImageFormatted::sanityCheck()
{
    LLImageBase::sanityCheck();

    if (mCodec >= IMG_CODEC_EOF)
    {
        LL_ERRS() << "Failed LLImageFormatted::sanityCheck "
               << "decoding " << S32(mDecoding)
               << "decoded " << S32(mDecoded)
               << "codec " << S32(mCodec)
               << LL_ENDL;
    }
}

//----------------------------------------------------------------------------

bool LLImageFormatted::copyData(U8 *data, S32 size)
{
    LLImageDataLock lock(this);

    if ( data && ((data != getData()) || (size != getDataSize())) )
    {
        deleteData();
        allocateData(size);
        memcpy(getData(), data, size);  /* Flawfinder: ignore */
    }
    return true;
}

// LLImageFormatted becomes the owner of data
void LLImageFormatted::setData(U8 *data, S32 size)
{
    LLImageDataLock lock(this);

    if (data && data != getData())
    {
        deleteData();
        setDataAndSize(data, size); // Access private LLImageBase members

        sGlobalFormattedMemory += getDataSize();
    }
}

void LLImageFormatted::appendData(U8 *data, S32 size)
{
    if (data)
    {
        LLImageDataLock lock(this);

        if (!getData())
        {
            setData(data, size);
        }
        else
        {
            S32 cursize = getDataSize();
            S32 newsize = cursize + size;
            reallocateData(newsize);
            memcpy(getData() + cursize, data, size);
            ll_aligned_free_16(data);
        }
    }
}

//----------------------------------------------------------------------------


bool LLImageFormatted::load(const std::string &filename, int load_size)
{
    resetLastError();

    S32 file_size = 0;
    LLAPRFile infile ;
    infile.open(filename, LL_APR_RB, NULL, &file_size);
    apr_file_t* apr_file = infile.getFileHandle();
    if (!apr_file)
    {
        setLastError("Unable to open file for reading", filename);
        return false;
    }
    if (file_size == 0)
    {
        setLastError("File is empty",filename);
        return false;
    }

    // Constrain the load size to acceptable values
    if ((load_size == 0) || (load_size > file_size))
    {
        load_size = file_size;
    }

    LLImageDataLock lock(this);

    bool res;
    U8 *data = allocateData(load_size);
    if (data)
    {
        apr_size_t bytes_read = load_size;
        apr_status_t s = apr_file_read(apr_file, data, &bytes_read); // modifies bytes_read
        if (s != APR_SUCCESS || (S32) bytes_read != load_size)
        {
            deleteData();
            setLastError("Unable to read file",filename);
            res = false;
        }
        else
        {
            res = updateData();
        }
    }
    else
    {
        setLastError("Allocation failure", filename);
        res = false;
    }

    return res;
}

bool LLImageFormatted::save(const std::string &filename)
{
    resetLastError();

    LLAPRFile outfile ;
    outfile.open(filename, LL_APR_WB);
    if (!outfile.getFileHandle())
    {
        setLastError("Unable to open file for writing", filename);
        return false;
    }

    LLImageDataSharedLock lock(this);

    S32 result = outfile.write(getData(), getDataSize());
    outfile.close() ;
    return (result != 0);
}

S8 LLImageFormatted::getCodec() const
{
    return mCodec;
}

static void avg4_colors4(const U8* a, const U8* b, const U8* c, const U8* d, U8* dst)
{
    dst[0] = (U8)(((U32)(a[0]) + b[0] + c[0] + d[0])>>2);
    dst[1] = (U8)(((U32)(a[1]) + b[1] + c[1] + d[1])>>2);
    dst[2] = (U8)(((U32)(a[2]) + b[2] + c[2] + d[2])>>2);
    dst[3] = (U8)(((U32)(a[3]) + b[3] + c[3] + d[3])>>2);
}

static void avg4_colors3(const U8* a, const U8* b, const U8* c, const U8* d, U8* dst)
{
    dst[0] = (U8)(((U32)(a[0]) + b[0] + c[0] + d[0])>>2);
    dst[1] = (U8)(((U32)(a[1]) + b[1] + c[1] + d[1])>>2);
    dst[2] = (U8)(((U32)(a[2]) + b[2] + c[2] + d[2])>>2);
}

static void avg4_colors2(const U8* a, const U8* b, const U8* c, const U8* d, U8* dst)
{
    dst[0] = (U8)(((U32)(a[0]) + b[0] + c[0] + d[0])>>2);
    dst[1] = (U8)(((U32)(a[1]) + b[1] + c[1] + d[1])>>2);
}

void LLImageBase::setDataAndSize(U8 *data, S32 size)
{
    ll_assert_aligned(data, 16);
    mData = data;
    mDataSize = size;
}

//static
void LLImageBase::generateMip(const U8* indata, U8* mipdata, S32 width, S32 height, S32 nchannels)
{
    llassert(width > 0 && height > 0);
    U8* data = mipdata;
    S32 in_width = width*2;
    for (S32 h=0; h<height; h++)
    {
        for (S32 w=0; w<width; w++)
        {
            switch(nchannels)
            {
              case 4:
                avg4_colors4(indata, indata+4, indata+4*in_width, indata+4*in_width+4, data);
                break;
              case 3:
                avg4_colors3(indata, indata+3, indata+3*in_width, indata+3*in_width+3, data);
                break;
              case 2:
                avg4_colors2(indata, indata+2, indata+2*in_width, indata+2*in_width+2, data);
                break;
              case 1:
                *(U8*)data = (U8)(((U32)(indata[0]) + indata[1] + indata[in_width] + indata[in_width+1])>>2);
                break;
              default:
                LL_ERRS() << "generateMmip called with bad num channels" << LL_ENDL;
            }
            indata += nchannels*2;
            data += nchannels;
        }
        indata += nchannels*in_width; // skip odd lines
    }
}


//============================================================================

//static
F32 LLImageBase::calc_download_priority(F32 virtual_size, F32 visible_pixels, S32 bytes_sent)
{
    F32 w_priority;

    F32 bytes_weight = 1.f;
    if (!bytes_sent)
    {
        bytes_weight = 20.f;
    }
    else if (bytes_sent < 1000)
    {
        bytes_weight = 1.f;
    }
    else if (bytes_sent < 2000)
    {
        bytes_weight = 1.f/1.5f;
    }
    else if (bytes_sent < 4000)
    {
        bytes_weight = 1.f/3.f;
    }
    else if (bytes_sent < 8000)
    {
        bytes_weight = 1.f/6.f;
    }
    else if (bytes_sent < 16000)
    {
        bytes_weight = 1.f/12.f;
    }
    else if (bytes_sent < 32000)
    {
        bytes_weight = 1.f/20.f;
    }
    else if (bytes_sent < 64000)
    {
        bytes_weight = 1.f/32.f;
    }
    else
    {
        bytes_weight = 1.f/64.f;
    }
    bytes_weight *= bytes_weight;


    //LL_INFOS() << "VS: " << virtual_size << LL_ENDL;
    F32 virtual_size_factor = virtual_size / (10.f*10.f);

    // The goal is for weighted priority to be <= 0 when we've reached a point where
    // we've sent enough data.
    //LL_INFOS() << "BytesSent: " << bytes_sent << LL_ENDL;
    //LL_INFOS() << "BytesWeight: " << bytes_weight << LL_ENDL;
    //LL_INFOS() << "PreLog: " << bytes_weight * virtual_size_factor << LL_ENDL;
    w_priority = (F32)log10(bytes_weight * virtual_size_factor);

    //LL_INFOS() << "PreScale: " << w_priority << LL_ENDL;

    // We don't want to affect how MANY bytes we send based on the visible pixels, but the order
    // in which they're sent.  We post-multiply so we don't change the zero point.
    if (w_priority > 0.f)
    {
        F32 pixel_weight = (F32)log10(visible_pixels + 1)*3.0f;
        w_priority *= pixel_weight;
    }

    return w_priority;
}

//============================================================================