diff options
Diffstat (limited to 'indra/llimage')
25 files changed, 582 insertions, 15 deletions
diff --git a/indra/llimage/CMakeLists.txt b/indra/llimage/CMakeLists.txt index 293ada7548..293ada7548 100755..100644 --- a/indra/llimage/CMakeLists.txt +++ b/indra/llimage/CMakeLists.txt diff --git a/indra/llimage/llimage.cpp b/indra/llimage/llimage.cpp index 16df27bb8e..08462c7834 100755..100644 --- a/indra/llimage/llimage.cpp +++ b/indra/llimage/llimage.cpp @@ -40,6 +40,545 @@ #include "llimagedxt.h" #include "llmemory.h" +#include <boost/preprocessor.hpp> + +//.................................................................................. +//.................................................................................. +// Helper macrose's for generate cycle unwrap templates +//.................................................................................. +#define _UNROL_GEN_TPL_arg_0(arg) +#define _UNROL_GEN_TPL_arg_1(arg) arg + +#define _UNROL_GEN_TPL_comma_0 +#define _UNROL_GEN_TPL_comma_1 BOOST_PP_COMMA() +//.................................................................................. +#define _UNROL_GEN_TPL_ARGS_macro(z,n,seq) \ + BOOST_PP_CAT(_UNROL_GEN_TPL_arg_, BOOST_PP_MOD(n, 2))(BOOST_PP_SEQ_ELEM(n, seq)) BOOST_PP_CAT(_UNROL_GEN_TPL_comma_, BOOST_PP_AND(BOOST_PP_MOD(n, 2), BOOST_PP_NOT_EQUAL(BOOST_PP_INC(n), BOOST_PP_SEQ_SIZE(seq)))) + +#define _UNROL_GEN_TPL_ARGS(seq) \ + BOOST_PP_REPEAT(BOOST_PP_SEQ_SIZE(seq), _UNROL_GEN_TPL_ARGS_macro, seq) +//.................................................................................. + +#define _UNROL_GEN_TPL_TYPE_ARGS_macro(z,n,seq) \ + BOOST_PP_SEQ_ELEM(n, seq) BOOST_PP_CAT(_UNROL_GEN_TPL_comma_, BOOST_PP_AND(BOOST_PP_MOD(n, 2), BOOST_PP_NOT_EQUAL(BOOST_PP_INC(n), BOOST_PP_SEQ_SIZE(seq)))) + +#define _UNROL_GEN_TPL_TYPE_ARGS(seq) \ + BOOST_PP_REPEAT(BOOST_PP_SEQ_SIZE(seq), _UNROL_GEN_TPL_TYPE_ARGS_macro, seq) +//.................................................................................. +#define _UNROLL_GEN_TPL_foreach_ee(z, n, seq) \ + executor<n>(_UNROL_GEN_TPL_ARGS(seq)); + +#define _UNROLL_GEN_TPL(name, args_seq, operation, spec) \ + template<> struct name<spec> { \ + private: \ + template<S32 _idx> inline void executor(_UNROL_GEN_TPL_TYPE_ARGS(args_seq)) { \ + BOOST_PP_SEQ_ENUM(operation) ; \ + } \ + public: \ + inline void operator()(_UNROL_GEN_TPL_TYPE_ARGS(args_seq)) { \ + BOOST_PP_REPEAT(spec, _UNROLL_GEN_TPL_foreach_ee, args_seq) \ + } \ +}; +//.................................................................................. +#define _UNROLL_GEN_TPL_foreach_seq_macro(r, data, elem) \ + _UNROLL_GEN_TPL(BOOST_PP_SEQ_ELEM(0, data), BOOST_PP_SEQ_ELEM(1, data), BOOST_PP_SEQ_ELEM(2, data), elem) + +#define UNROLL_GEN_TPL(name, args_seq, operation, spec_seq) \ + /*general specialization - should not be implemented!*/ \ + template<U8> struct name { inline void operator()(_UNROL_GEN_TPL_TYPE_ARGS(args_seq)) { /*static_assert(!"Should not be instantiated.");*/ } }; \ + BOOST_PP_SEQ_FOR_EACH(_UNROLL_GEN_TPL_foreach_seq_macro, (name)(args_seq)(operation), spec_seq) +//.................................................................................. +//.................................................................................. + + +//.................................................................................. +// Generated unrolling loop templates with specializations +//.................................................................................. +//example: for(c = 0; c < ch; ++c) comp[c] = cx[0] = 0; +UNROLL_GEN_TPL(uroll_zeroze_cx_comp, (S32 *)(cx)(S32 *)(comp), (cx[_idx] = comp[_idx] = 0), (1)(3)(4)); +//example: for(c = 0; c < ch; ++c) comp[c] >>= 4; +UNROLL_GEN_TPL(uroll_comp_rshftasgn_constval, (S32 *)(comp)(const S32)(cval), (comp[_idx] >>= cval), (1)(3)(4)); +//example: for(c = 0; c < ch; ++c) comp[c] = (cx[c] >> 5) * yap; +UNROLL_GEN_TPL(uroll_comp_asgn_cx_rshft_cval_all_mul_val, (S32 *)(comp)(S32 *)(cx)(const S32)(cval)(S32)(val), (comp[_idx] = (cx[_idx] >> cval) * val), (1)(3)(4)); +//example: for(c = 0; c < ch; ++c) comp[c] += (cx[c] >> 5) * Cy; +UNROLL_GEN_TPL(uroll_comp_plusasgn_cx_rshft_cval_all_mul_val, (S32 *)(comp)(S32 *)(cx)(const S32)(cval)(S32)(val), (comp[_idx] += (cx[_idx] >> cval) * val), (1)(3)(4)); +//example: for(c = 0; c < ch; ++c) comp[c] += pix[c] * info.xapoints[x]; +UNROLL_GEN_TPL(uroll_inp_plusasgn_pix_mul_val, (S32 *)(comp)(const U8 *)(pix)(S32)(val), (comp[_idx] += pix[_idx] * val), (1)(3)(4)); +//example: for(c = 0; c < ch; ++c) cx[c] = pix[c] * info.xapoints[x]; +UNROLL_GEN_TPL(uroll_inp_asgn_pix_mul_val, (S32 *)(comp)(const U8 *)(pix)(S32)(val), (comp[_idx] = pix[_idx] * val), (1)(3)(4)); +//example: for(c = 0; c < ch; ++c) comp[c] = ((cx[c] * info.yapoints[y]) + (comp[c] * (256 - info.yapoints[y]))) >> 16; +UNROLL_GEN_TPL(uroll_comp_asgn_cx_mul_apoint_plus_comp_mul_inv_apoint_allshifted_16_r, (S32 *)(comp)(S32 *)(cx)(S32)(apoint), (comp[_idx] = ((cx[_idx] * apoint) + (comp[_idx] * (256 - apoint))) >> 16), (1)(3)(4)); +//example: for(c = 0; c < ch; ++c) comp[c] = (comp[c] + pix[c] * info.yapoints[y]) >> 8; +UNROLL_GEN_TPL(uroll_comp_asgn_comp_plus_pix_mul_apoint_allshifted_8_r, (S32 *)(comp)(const U8 *)(pix)(S32)(apoint), (comp[_idx] = (comp[_idx] + pix[_idx] * apoint) >> 8), (1)(3)(4)); +//example: for(c = 0; c < ch; ++c) comp[c] = ((comp[c]*(256 - info.xapoints[x])) + ((cx[c] * info.xapoints[x]))) >> 12; +UNROLL_GEN_TPL(uroll_comp_asgn_comp_mul_inv_apoint_plus_cx_mul_apoint_allshifted_12_r, (S32 *)(comp)(S32)(apoint)(S32 *)(cx), (comp[_idx] = ((comp[_idx] * (256-apoint)) + (cx[_idx] * apoint)) >> 12), (1)(3)(4)); +//example: for(c = 0; c < ch; ++c) *dptr++ = comp[c]&0xff; +UNROLL_GEN_TPL(uroll_uref_dptr_inc_asgn_comp_and_ff, (U8 *&)(dptr)(S32 *)(comp), (*dptr++ = comp[_idx]&0xff), (1)(3)(4)); +//example: for(c = 0; c < ch; ++c) *dptr++ = (sptr[info.xpoints[x]*ch + c])&0xff; +UNROLL_GEN_TPL(uroll_uref_dptr_inc_asgn_sptr_apoint_plus_idx_alland_ff, (U8 *&)(dptr)(const U8 *)(sptr)(S32)(apoint), (*dptr++ = sptr[apoint + _idx]&0xff), (1)(3)(4)); +//example: for(c = 0; c < ch; ++c) *dptr++ = (comp[c]>>10)&0xff; +UNROLL_GEN_TPL(uroll_uref_dptr_inc_asgn_comp_rshft_cval_and_ff, (U8 *&)(dptr)(S32 *)(comp)(const S32)(cval), (*dptr++ = (comp[_idx]>>cval)&0xff), (1)(3)(4)); +//.................................................................................. + + +template<U8 ch> +struct scale_info +{ +public: + std::vector<S32> xpoints; + std::vector<const U8*> ystrides; + std::vector<S32> xapoints, yapoints; + S32 xup_yup; + +public: + //unrolling loop types declaration + typedef uroll_zeroze_cx_comp<ch> uroll_zeroze_cx_comp_t; + typedef uroll_comp_rshftasgn_constval<ch> uroll_comp_rshftasgn_constval_t; + typedef uroll_comp_asgn_cx_rshft_cval_all_mul_val<ch> uroll_comp_asgn_cx_rshft_cval_all_mul_val_t; + typedef uroll_comp_plusasgn_cx_rshft_cval_all_mul_val<ch> uroll_comp_plusasgn_cx_rshft_cval_all_mul_val_t; + typedef uroll_inp_plusasgn_pix_mul_val<ch> uroll_inp_plusasgn_pix_mul_val_t; + typedef uroll_inp_asgn_pix_mul_val<ch> uroll_inp_asgn_pix_mul_val_t; + typedef uroll_comp_asgn_cx_mul_apoint_plus_comp_mul_inv_apoint_allshifted_16_r<ch> uroll_comp_asgn_cx_mul_apoint_plus_comp_mul_inv_apoint_allshifted_16_r_t; + typedef uroll_comp_asgn_comp_plus_pix_mul_apoint_allshifted_8_r<ch> uroll_comp_asgn_comp_plus_pix_mul_apoint_allshifted_8_r_t; + typedef uroll_comp_asgn_comp_mul_inv_apoint_plus_cx_mul_apoint_allshifted_12_r<ch> uroll_comp_asgn_comp_mul_inv_apoint_plus_cx_mul_apoint_allshifted_12_r_t; + typedef uroll_uref_dptr_inc_asgn_comp_and_ff<ch> uroll_uref_dptr_inc_asgn_comp_and_ff_t; + typedef uroll_uref_dptr_inc_asgn_sptr_apoint_plus_idx_alland_ff<ch> uroll_uref_dptr_inc_asgn_sptr_apoint_plus_idx_alland_ff_t; + typedef uroll_uref_dptr_inc_asgn_comp_rshft_cval_and_ff<ch> uroll_uref_dptr_inc_asgn_comp_rshft_cval_and_ff_t; + +public: + scale_info(const U8 *src, U32 srcW, U32 srcH, U32 dstW, U32 dstH, U32 srcStride) + : xup_yup((dstW >= srcW) + ((dstH >= srcH) << 1)) + { + calc_x_points(srcW, dstW); + calc_y_strides(src, srcStride, srcH, dstH); + calc_aa_points(srcW, dstW, xup_yup&1, xapoints); + calc_aa_points(srcH, dstH, xup_yup&2, yapoints); + } + +private: + //........................................................................................... + void calc_x_points(U32 srcW, U32 dstW) + { + xpoints.resize(dstW+1); + + S32 val = dstW >= srcW ? 0x8000 * srcW / dstW - 0x8000 : 0; + S32 inc = (srcW << 16) / dstW; + + for(U32 i = 0, j = 0; i < dstW; ++i, ++j, val += inc) + { + xpoints[j] = llmax(0, val >> 16); + } + } + //........................................................................................... + void calc_y_strides(const U8 *src, U32 srcStride, U32 srcH, U32 dstH) + { + ystrides.resize(dstH+1); + + S32 val = dstH >= srcH ? 0x8000 * srcH / dstH - 0x8000 : 0; + S32 inc = (srcH << 16) / dstH; + + for(U32 i = 0, j = 0; i < dstH; ++i, ++j, val += inc) + { + ystrides[j] = src + llmax(0, val >> 16) * srcStride; + } + } + //........................................................................................... + void calc_aa_points(U32 srcSz, U32 dstSz, bool scale_up, std::vector<S32> &vp) + { + vp.resize(dstSz); + + if(scale_up) + { + S32 val = 0x8000 * srcSz / dstSz - 0x8000; + S32 inc = (srcSz << 16) / dstSz; + U32 pos; + + for(U32 i = 0, j = 0; i < dstSz; ++i, ++j, val += inc) + { + pos = val >> 16; + + if (pos >= (srcSz - 1)) + vp[j] = 0; + else + vp[j] = (val >> 8) - ((val >> 8) & 0xffffff00); + } + } + else + { + S32 inc = (srcSz << 16) / dstSz; + S32 Cp = ((dstSz << 14) / srcSz) + 1; + S32 ap; + + for(U32 i = 0, j = 0, val = 0; i < dstSz; ++i, ++j, val += inc) + { + ap = ((0x100 - ((val >> 8) & 0xff)) * Cp) >> 8; + vp[j] = ap | (Cp << 16); + } + } + } +}; + + +template<U8 ch> +inline void bilinear_scale( + const U8 *src, U32 srcW, U32 srcH, U32 srcStride + , U8 *dst, U32 dstW, U32 dstH, U32 dstStride + ) +{ + typedef scale_info<ch> scale_info_t; + + scale_info_t info(src, srcW, srcH, dstW, dstH, srcStride); + + const U8 *sptr; + U8 *dptr; + U32 x, y; + const U8 *pix; + + S32 cx[ch], comp[ch]; + + + if(3 == info.xup_yup) + { //scale x/y - up + for(y = 0; y < dstH; ++y) + { + dptr = dst + (y * dstStride); + sptr = info.ystrides[y]; + + if(0 < info.yapoints[y]) + { + for(x = 0; x < dstW; ++x) + { + //for(c = 0; c < ch; ++c) cx[c] = comp[c] = 0; + typename scale_info_t::uroll_zeroze_cx_comp_t()(cx, comp); + + if(0 < info.xapoints[x]) + { + pix = info.ystrides[y] + info.xpoints[x] * ch; + + //for(c = 0; c < ch; ++c) comp[c] = pix[c] * (256 - info.xapoints[x]); + typename scale_info_t::uroll_inp_asgn_pix_mul_val_t()(comp, pix, 256 - info.xapoints[x]); + + pix += ch; + + //for(c = 0; c < ch; ++c) comp[c] += pix[c] * info.xapoints[x]; + typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(comp, pix, info.xapoints[x]); + + pix += srcStride; + + //for(c = 0; c < ch; ++c) cx[c] = pix[c] * info.xapoints[x]; + typename scale_info_t::uroll_inp_asgn_pix_mul_val_t()(cx, pix, info.xapoints[x]); + + pix -= ch; + + //for(c = 0; c < ch; ++c) { + // cx[c] += pix[c] * (256 - info.xapoints[x]); + // comp[c] = ((cx[c] * info.yapoints[y]) + (comp[c] * (256 - info.yapoints[y]))) >> 16; + // *dptr++ = comp[c]&0xff; + //} + typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(cx, pix, 256 - info.xapoints[x]); + typename scale_info_t::uroll_comp_asgn_cx_mul_apoint_plus_comp_mul_inv_apoint_allshifted_16_r_t()(comp, cx, info.yapoints[y]); + typename scale_info_t::uroll_uref_dptr_inc_asgn_comp_and_ff_t()(dptr, comp); + } + else + { + pix = info.ystrides[y] + info.xpoints[x] * ch; + + //for(c = 0; c < ch; ++c) comp[c] = pix[c] * (256 - info.yapoints[y]); + typename scale_info_t::uroll_inp_asgn_pix_mul_val_t()(comp, pix, 256-info.yapoints[y]); + + pix += srcStride; + + //for(c = 0; c < ch; ++c) { + // comp[c] = (comp[c] + pix[c] * info.yapoints[y]) >> 8; + // *dptr++ = comp[c]&0xff; + //} + typename scale_info_t::uroll_comp_asgn_comp_plus_pix_mul_apoint_allshifted_8_r_t()(comp, pix, info.yapoints[y]); + typename scale_info_t::uroll_uref_dptr_inc_asgn_comp_and_ff_t()(dptr, comp); + } + } + } + else + { + for(x = 0; x < dstW; ++x) + { + if(0 < info.xapoints[x]) + { + pix = info.ystrides[y] + info.xpoints[x] * ch; + + //for(c = 0; c < ch; ++c) { + // comp[c] = pix[c] * (256 - info.xapoints[x]); + // comp[c] = (comp[c] + pix[c] * info.xapoints[x]) >> 8; + // *dptr++ = comp[c]&0xff; + //} + typename scale_info_t::uroll_inp_asgn_pix_mul_val_t()(comp, pix, 256 - info.xapoints[x]); + typename scale_info_t::uroll_comp_asgn_comp_plus_pix_mul_apoint_allshifted_8_r_t()(comp, pix, info.xapoints[x]); + typename scale_info_t::uroll_uref_dptr_inc_asgn_comp_and_ff_t()(dptr, comp); + } + else + { + //for(c = 0; c < ch; ++c) *dptr++ = (sptr[info.xpoints[x]*ch + c])&0xff; + typename scale_info_t::uroll_uref_dptr_inc_asgn_sptr_apoint_plus_idx_alland_ff_t()(dptr, sptr, info.xpoints[x]*ch); + } + } + } + } + } + else if(info.xup_yup == 1) + { //scaling down vertically + S32 Cy, j; + S32 yap; + + for(y = 0; y < dstH; y++) + { + Cy = info.yapoints[y] >> 16; + yap = info.yapoints[y] & 0xffff; + + dptr = dst + (y * dstStride); + + for(x = 0; x < dstW; x++) + { + pix = info.ystrides[y] + info.xpoints[x] * ch; + + //for(c = 0; c < ch; ++c) comp[c] = pix[c] * yap; + typename scale_info_t::uroll_inp_asgn_pix_mul_val_t()(comp, pix, yap); + + pix += srcStride; + + for(j = (1 << 14) - yap; j > Cy; j -= Cy, pix += srcStride) + { + //for(c = 0; c < ch; ++c) comp[c] += pix[c] * Cy; + typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(comp, pix, Cy); + } + + if(j > 0) + { + //for(c = 0; c < ch; ++c) comp[c] += pix[c] * j; + typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(comp, pix, j); + } + + if(info.xapoints[x] > 0) + { + pix = info.ystrides[y] + info.xpoints[x]*ch + ch; + //for(c = 0; c < ch; ++c) cx[c] = pix[c] * yap; + typename scale_info_t::uroll_inp_asgn_pix_mul_val_t()(cx, pix, yap); + + pix += srcStride; + for(j = (1 << 14) - yap; j > Cy; j -= Cy) + { + //for(c = 0; c < ch; ++c) cx[c] += pix[c] * Cy; + typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(cx, pix, Cy); + pix += srcStride; + } + + if(j > 0) + { + //for(c = 0; c < ch; ++c) cx[c] += pix[c] * j; + typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(cx, pix, j); + } + + //for(c = 0; c < ch; ++c) comp[c] = ((comp[c]*(256 - info.xapoints[x])) + ((cx[c] * info.xapoints[x]))) >> 12; + typename scale_info_t::uroll_comp_asgn_comp_mul_inv_apoint_plus_cx_mul_apoint_allshifted_12_r_t()(comp, info.xapoints[x], cx); + } + else + { + //for(c = 0; c < ch; ++c) comp[c] >>= 4; + typename scale_info_t::uroll_comp_rshftasgn_constval_t()(comp, 4); + } + + //for(c = 0; c < ch; ++c) *dptr++ = (comp[c]>>10)&0xff; + typename scale_info_t::uroll_uref_dptr_inc_asgn_comp_rshft_cval_and_ff_t()(dptr, comp, 10); + } + } + } + else if(info.xup_yup == 2) + { // scaling down horizontally + S32 Cx, j; + S32 xap; + + for(y = 0; y < dstH; y++) + { + dptr = dst + (y * dstStride); + + for(x = 0; x < dstW; x++) + { + Cx = info.xapoints[x] >> 16; + xap = info.xapoints[x] & 0xffff; + + pix = info.ystrides[y] + info.xpoints[x] * ch; + + //for(c = 0; c < ch; ++c) comp[c] = pix[c] * xap; + typename scale_info_t::uroll_inp_asgn_pix_mul_val_t()(comp, pix, xap); + + pix+=ch; + for(j = (1 << 14) - xap; j > Cx; j -= Cx) + { + //for(c = 0; c < ch; ++c) comp[c] += pix[c] * Cx; + typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(comp, pix, Cx); + pix+=ch; + } + + if(j > 0) + { + //for(c = 0; c < ch; ++c) comp[c] += pix[c] * j; + typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(comp, pix, j); + } + + if(info.yapoints[y] > 0) + { + pix = info.ystrides[y] + info.xpoints[x]*ch + srcStride; + //for(c = 0; c < ch; ++c) cx[c] = pix[c] * xap; + typename scale_info_t::uroll_inp_asgn_pix_mul_val_t()(cx, pix, xap); + + pix+=ch; + for(j = (1 << 14) - xap; j > Cx; j -= Cx) + { + //for(c = 0; c < ch; ++c) cx[c] += pix[c] * Cx; + typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(cx, pix, Cx); + pix+=ch; + } + + if(j > 0) + { + //for(c = 0; c < ch; ++c) cx[c] += pix[c] * j; + typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(cx, pix, j); + } + + //for(c = 0; c < ch; ++c) comp[c] = ((comp[c] * (256 - info.yapoints[y])) + ((cx[c] * info.yapoints[y]))) >> 12; + typename scale_info_t::uroll_comp_asgn_comp_mul_inv_apoint_plus_cx_mul_apoint_allshifted_12_r_t()(comp, info.yapoints[y], cx); + } + else + { + //for(c = 0; c < ch; ++c) comp[c] >>= 4; + typename scale_info_t::uroll_comp_rshftasgn_constval_t()(comp, 4); + } + + //for(c = 0; c < ch; ++c) *dptr++ = (comp[c]>>10)&0xff; + typename scale_info_t::uroll_uref_dptr_inc_asgn_comp_rshft_cval_and_ff_t()(dptr, comp, 10); + } + } + } + else + { //scale x/y - down + S32 Cx, Cy, i, j; + S32 xap, yap; + + for(y = 0; y < dstH; y++) + { + Cy = info.yapoints[y] >> 16; + yap = info.yapoints[y] & 0xffff; + + dptr = dst + (y * dstStride); + for(x = 0; x < dstW; x++) + { + Cx = info.xapoints[x] >> 16; + xap = info.xapoints[x] & 0xffff; + + sptr = info.ystrides[y] + info.xpoints[x] * ch; + pix = sptr; + sptr += srcStride; + + //for(c = 0; c < ch; ++c) cx[c] = pix[c] * xap; + typename scale_info_t::uroll_inp_asgn_pix_mul_val_t()(cx, pix, xap); + + pix+=ch; + for(i = (1 << 14) - xap; i > Cx; i -= Cx) + { + //for(c = 0; c < ch; ++c) cx[c] += pix[c] * Cx; + typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(cx, pix, Cx); + pix+=ch; + } + + if(i > 0) + { + //for(c = 0; c < ch; ++c) cx[c] += pix[c] * i; + typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(cx, pix, i); + } + + //for(c = 0; c < ch; ++c) comp[c] = (cx[c] >> 5) * yap; + typename scale_info_t::uroll_comp_asgn_cx_rshft_cval_all_mul_val_t()(comp, cx, 5, yap); + + for(j = (1 << 14) - yap; j > Cy; j -= Cy) + { + pix = sptr; + sptr += srcStride; + + //for(c = 0; c < ch; ++c) cx[c] = pix[c] * xap; + typename scale_info_t::uroll_inp_asgn_pix_mul_val_t()(cx, pix, xap); + + pix+=ch; + for(i = (1 << 14) - xap; i > Cx; i -= Cx) + { + //for(c = 0; c < ch; ++c) cx[c] += pix[c] * Cx; + typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(cx, pix, Cx); + pix+=ch; + } + + if(i > 0) + { + //for(c = 0; c < ch; ++c) cx[c] += pix[c] * i; + typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(cx, pix, i); + } + + //for(c = 0; c < ch; ++c) comp[c] += (cx[c] >> 5) * Cy; + typename scale_info_t::uroll_comp_plusasgn_cx_rshft_cval_all_mul_val_t()(comp, cx, 5, Cy); + } + + if(j > 0) + { + pix = sptr; + sptr += srcStride; + + //for(c = 0; c < ch; ++c) cx[c] = pix[c] * xap; + typename scale_info_t::uroll_inp_asgn_pix_mul_val_t()(cx, pix, xap); + + pix+=ch; + for(i = (1 << 14) - xap; i > Cx; i -= Cx) + { + //for(c = 0; c < ch; ++c) cx[c] += pix[c] * Cx; + typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(cx, pix, Cx); + pix+=ch; + } + + if(i > 0) + { + //for(c = 0; c < ch; ++c) cx[c] += pix[c] * i; + typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(cx, pix, i); + } + + //for(c = 0; c < ch; ++c) comp[c] += (cx[c] >> 5) * j; + typename scale_info_t::uroll_comp_plusasgn_cx_rshft_cval_all_mul_val_t()(comp, cx, 5, j); + } + + //for(c = 0; c < ch; ++c) *dptr++ = (comp[c]>>23)&0xff; + typename scale_info_t::uroll_uref_dptr_inc_asgn_comp_rshft_cval_and_ff_t()(dptr, comp, 23); + } + } + } //else +} + +//wrapper +static void bilinear_scale(const U8 *src, U32 srcW, U32 srcH, U32 srcCh, U32 srcStride, U8 *dst, U32 dstW, U32 dstH, U32 dstCh, U32 dstStride) +{ + llassert(srcCh == dstCh); + + switch(srcCh) + { + case 1: + bilinear_scale<1>(src, srcW, srcH, srcStride, dst, dstW, dstH, dstStride); + break; + case 3: + bilinear_scale<3>(src, srcW, srcH, srcStride, dst, dstW, dstH, dstStride); + break; + case 4: + bilinear_scale<4>(src, srcW, srcH, srcStride, dst, dstW, dstH, dstStride); + break; + default: + llassert(!"Implement if need"); + break; + } + +} + //--------------------------------------------------------------------------- // LLImage //--------------------------------------------------------------------------- @@ -559,6 +1098,7 @@ void LLImageRaw::composite( LLImageRaw* src ) } } + // Src and dst can be any size. Src has 4 components. Dst has 3 components. void LLImageRaw::compositeScaled4onto3(LLImageRaw* src) { @@ -589,21 +1129,6 @@ void LLImageRaw::compositeScaled4onto3(LLImageRaw* src) // Src and dst are same size. Src has 4 components. Dst has 3 components. void LLImageRaw::compositeUnscaled4onto3( LLImageRaw* src ) { - /* - //test fastFractionalMult() - { - U8 i = 255; - U8 j = 255; - do - { - do - { - llassert( fastFractionalMult(i, j) == (U8)(255*(i/255.f)*(j/255.f) + 0.5f) ); - } while( j-- ); - } while( i-- ); - } - */ - LLImageRaw* dst = this; // Just for clarity. llassert( (3 == src->getComponents()) || (4 == src->getComponents()) ); @@ -639,6 +1164,7 @@ void LLImageRaw::compositeUnscaled4onto3( LLImageRaw* src ) } } + void LLImageRaw::copyUnscaledAlphaMask( LLImageRaw* src, const LLColor4U& fill) { LLImageRaw* dst = this; // Just for clarity. @@ -846,6 +1372,12 @@ void LLImageRaw::copyScaled( LLImageRaw* src ) return; } + bilinear_scale( + src->getData(), src->getWidth(), src->getHeight(), src->getComponents(), src->getWidth()*src->getComponents() + , dst->getData(), dst->getWidth(), dst->getHeight(), dst->getComponents(), dst->getWidth()*dst->getComponents() + ); + + /* S32 temp_data_size = src->getWidth() * dst->getHeight() * getComponents(); llassert_always(temp_data_size > 0); std::vector<U8> temp_buffer(temp_data_size); @@ -861,6 +1393,7 @@ void LLImageRaw::copyScaled( LLImageRaw* src ) { copyLineScaled( &temp_buffer[0] + (getComponents() * src->getWidth() * row), dst->getData() + (getComponents() * dst->getWidth() * row), src->getWidth(), dst->getWidth(), 1, 1 ); } + */ } @@ -880,6 +1413,7 @@ BOOL LLImageRaw::scale( S32 new_width, S32 new_height, BOOL scale_image_data ) if (scale_image_data) { + /* S32 temp_data_size = old_width * new_height * getComponents(); llassert_always(temp_data_size > 0); std::vector<U8> temp_buffer(temp_data_size); @@ -899,6 +1433,19 @@ BOOL LLImageRaw::scale( S32 new_width, S32 new_height, BOOL scale_image_data ) { copyLineScaled( &temp_buffer[0] + (getComponents() * old_width * row), new_buffer + (getComponents() * new_width * row), old_width, new_width, 1, 1 ); } + */ + + S32 new_data_size = new_width * new_height * getComponents(); + llassert_always(new_data_size > 0); + + U8 *new_data = (U8*)ALLOCATE_MEM(LLImageBase::getPrivatePool(), new_data_size); + if(NULL == new_data) + { + return FALSE; + } + + bilinear_scale(getData(), old_width, old_height, getComponents(), old_width*getComponents(), new_data, new_width, new_height, getComponents(), new_width*getComponents()); + setDataAndSize(new_data, new_width, new_height, getComponents()); } else { diff --git a/indra/llimage/llimage.h b/indra/llimage/llimage.h index cd3f76f1fd..cd3f76f1fd 100755..100644 --- a/indra/llimage/llimage.h +++ b/indra/llimage/llimage.h diff --git a/indra/llimage/llimagebmp.cpp b/indra/llimage/llimagebmp.cpp index 8573fe0d91..a2ce2fee86 100755..100644 --- a/indra/llimage/llimagebmp.cpp +++ b/indra/llimage/llimagebmp.cpp @@ -443,6 +443,10 @@ BOOL LLImageBMP::decodeColorMask32( U8* dst, U8* src ) mBitfieldMask[2] = 0x000000FF; } + if (getWidth() * getHeight() * 4 > getDataSize() - mBitmapOffset) + { //here we have situation when data size in src less than actually needed + return FALSE; + } S32 src_row_span = getWidth() * 4; S32 alignment_bytes = (3 * src_row_span) % 4; // round up to nearest multiple of 4 @@ -476,6 +480,11 @@ BOOL LLImageBMP::decodeColorTable8( U8* dst, U8* src ) S32 src_row_span = getWidth() * 1; S32 alignment_bytes = (3 * src_row_span) % 4; // round up to nearest multiple of 4 + if ((getWidth() * getHeight()) + getHeight() * alignment_bytes > getDataSize() - mBitmapOffset) + { //here we have situation when data size in src less than actually needed + return FALSE; + } + for( S32 row = 0; row < getHeight(); row++ ) { for( S32 col = 0; col < getWidth(); col++ ) @@ -501,6 +510,11 @@ BOOL LLImageBMP::decodeTruecolor24( U8* dst, U8* src ) S32 src_row_span = getWidth() * 3; S32 alignment_bytes = (3 * src_row_span) % 4; // round up to nearest multiple of 4 + if ((getWidth() * getHeight() * 3) + getHeight() * alignment_bytes > getDataSize() - mBitmapOffset) + { //here we have situation when data size in src less than actually needed + return FALSE; + } + for( S32 row = 0; row < getHeight(); row++ ) { for( S32 col = 0; col < getWidth(); col++ ) diff --git a/indra/llimage/llimagebmp.h b/indra/llimage/llimagebmp.h index db0b45def0..db0b45def0 100755..100644 --- a/indra/llimage/llimagebmp.h +++ b/indra/llimage/llimagebmp.h diff --git a/indra/llimage/llimagedimensionsinfo.cpp b/indra/llimage/llimagedimensionsinfo.cpp index 5bf3f29b3c..5bf3f29b3c 100755..100644 --- a/indra/llimage/llimagedimensionsinfo.cpp +++ b/indra/llimage/llimagedimensionsinfo.cpp diff --git a/indra/llimage/llimagedimensionsinfo.h b/indra/llimage/llimagedimensionsinfo.h index 8f716c5d02..8f716c5d02 100755..100644 --- a/indra/llimage/llimagedimensionsinfo.h +++ b/indra/llimage/llimagedimensionsinfo.h diff --git a/indra/llimage/llimagedxt.cpp b/indra/llimage/llimagedxt.cpp index 04e0e752eb..04e0e752eb 100755..100644 --- a/indra/llimage/llimagedxt.cpp +++ b/indra/llimage/llimagedxt.cpp diff --git a/indra/llimage/llimagedxt.h b/indra/llimage/llimagedxt.h index a8756ba8ed..a8756ba8ed 100755..100644 --- a/indra/llimage/llimagedxt.h +++ b/indra/llimage/llimagedxt.h diff --git a/indra/llimage/llimagefilter.cpp b/indra/llimage/llimagefilter.cpp index 41adc7be9a..41adc7be9a 100755..100644 --- a/indra/llimage/llimagefilter.cpp +++ b/indra/llimage/llimagefilter.cpp diff --git a/indra/llimage/llimagefilter.h b/indra/llimage/llimagefilter.h index 16ec395f76..16ec395f76 100755..100644 --- a/indra/llimage/llimagefilter.h +++ b/indra/llimage/llimagefilter.h diff --git a/indra/llimage/llimagej2c.cpp b/indra/llimage/llimagej2c.cpp index 7cd59a2983..7cd59a2983 100755..100644 --- a/indra/llimage/llimagej2c.cpp +++ b/indra/llimage/llimagej2c.cpp diff --git a/indra/llimage/llimagej2c.h b/indra/llimage/llimagej2c.h index ce8195940d..ce8195940d 100755..100644 --- a/indra/llimage/llimagej2c.h +++ b/indra/llimage/llimagej2c.h diff --git a/indra/llimage/llimagejpeg.cpp b/indra/llimage/llimagejpeg.cpp index e419c77ff2..e419c77ff2 100755..100644 --- a/indra/llimage/llimagejpeg.cpp +++ b/indra/llimage/llimagejpeg.cpp diff --git a/indra/llimage/llimagejpeg.h b/indra/llimage/llimagejpeg.h index 2142660c81..2142660c81 100755..100644 --- a/indra/llimage/llimagejpeg.h +++ b/indra/llimage/llimagejpeg.h diff --git a/indra/llimage/llimagepng.cpp b/indra/llimage/llimagepng.cpp index 7735dc1379..7735dc1379 100755..100644 --- a/indra/llimage/llimagepng.cpp +++ b/indra/llimage/llimagepng.cpp diff --git a/indra/llimage/llimagepng.h b/indra/llimage/llimagepng.h index 1fbd850a2e..1fbd850a2e 100755..100644 --- a/indra/llimage/llimagepng.h +++ b/indra/llimage/llimagepng.h diff --git a/indra/llimage/llimagetga.cpp b/indra/llimage/llimagetga.cpp index 4eb8dc7440..d0ae105ba7 100755..100644 --- a/indra/llimage/llimagetga.cpp +++ b/indra/llimage/llimagetga.cpp @@ -437,7 +437,13 @@ BOOL LLImageTGA::decodeTruecolorNonRle( LLImageRaw* raw_image, BOOL &alpha_opaqu // Origin is the bottom left U8* dst = raw_image->getData(); U8* src = getData() + mDataOffset; + S32 pixels = getWidth() * getHeight(); + + if (pixels * (mIs15Bit ? 2 : getComponents()) > getDataSize() - mDataOffset) + { //here we have situation when data size in src less than actually needed + return FALSE; + } if (getComponents() == 4) { diff --git a/indra/llimage/llimagetga.h b/indra/llimage/llimagetga.h index 5da3525149..5da3525149 100755..100644 --- a/indra/llimage/llimagetga.h +++ b/indra/llimage/llimagetga.h diff --git a/indra/llimage/llimageworker.cpp b/indra/llimage/llimageworker.cpp index 4875fe7001..4875fe7001 100755..100644 --- a/indra/llimage/llimageworker.cpp +++ b/indra/llimage/llimageworker.cpp diff --git a/indra/llimage/llimageworker.h b/indra/llimage/llimageworker.h index 1bfb0ddfd3..1bfb0ddfd3 100755..100644 --- a/indra/llimage/llimageworker.h +++ b/indra/llimage/llimageworker.h diff --git a/indra/llimage/llmapimagetype.h b/indra/llimage/llmapimagetype.h index 0a040d3db9..0a040d3db9 100755..100644 --- a/indra/llimage/llmapimagetype.h +++ b/indra/llimage/llmapimagetype.h diff --git a/indra/llimage/llpngwrapper.cpp b/indra/llimage/llpngwrapper.cpp index aad139f570..aad139f570 100755..100644 --- a/indra/llimage/llpngwrapper.cpp +++ b/indra/llimage/llpngwrapper.cpp diff --git a/indra/llimage/llpngwrapper.h b/indra/llimage/llpngwrapper.h index 27d7df3bef..27d7df3bef 100755..100644 --- a/indra/llimage/llpngwrapper.h +++ b/indra/llimage/llpngwrapper.h diff --git a/indra/llimage/tests/llimageworker_test.cpp b/indra/llimage/tests/llimageworker_test.cpp index 51c5c63556..51c5c63556 100755..100644 --- a/indra/llimage/tests/llimageworker_test.cpp +++ b/indra/llimage/tests/llimageworker_test.cpp |