summaryrefslogtreecommitdiff
path: root/indra
diff options
context:
space:
mode:
Diffstat (limited to 'indra')
-rwxr-xr-xindra/llimage/llimage.cpp580
1 files changed, 565 insertions, 15 deletions
diff --git a/indra/llimage/llimage.cpp b/indra/llimage/llimage.cpp
index d336eeaabc..2da9aabcd4 100755
--- a/indra/llimage/llimage.cpp
+++ b/indra/llimage/llimage.cpp
@@ -40,6 +40,548 @@
#include "llimagedxt.h"
#include "llmemory.h"
+#include <boost/static_assert.hpp>
+#include <boost/preprocessor.hpp>
+
+//..................................................................................
+//..................................................................................
+// Helper macrose's for generate cycle unwrap templates
+//..................................................................................
+#define _UNROL_GEN_TPL_arg_0(arg)
+#define _UNROL_GEN_TPL_arg_1(arg) arg
+
+#define _UNROL_GEN_TPL_comma_0
+#define _UNROL_GEN_TPL_comma_1 BOOST_PP_COMMA()
+//..................................................................................
+#define _UNROL_GEN_TPL_ARGS_macro(z,n,seq) \
+ BOOST_PP_CAT(_UNROL_GEN_TPL_arg_, BOOST_PP_MOD(n, 2))(BOOST_PP_SEQ_ELEM(n, seq)) BOOST_PP_CAT(_UNROL_GEN_TPL_comma_, BOOST_PP_AND(BOOST_PP_MOD(n, 2), BOOST_PP_NOT_EQUAL(BOOST_PP_INC(n), BOOST_PP_SEQ_SIZE(seq))))
+
+#define _UNROL_GEN_TPL_ARGS(seq) \
+ BOOST_PP_REPEAT(BOOST_PP_SEQ_SIZE(seq), _UNROL_GEN_TPL_ARGS_macro, seq)
+//..................................................................................
+
+#define _UNROL_GEN_TPL_TYPE_ARGS_macro(z,n,seq) \
+ BOOST_PP_SEQ_ELEM(n, seq) BOOST_PP_CAT(_UNROL_GEN_TPL_comma_, BOOST_PP_AND(BOOST_PP_MOD(n, 2), BOOST_PP_NOT_EQUAL(BOOST_PP_INC(n), BOOST_PP_SEQ_SIZE(seq))))
+
+#define _UNROL_GEN_TPL_TYPE_ARGS(seq) \
+ BOOST_PP_REPEAT(BOOST_PP_SEQ_SIZE(seq), _UNROL_GEN_TPL_TYPE_ARGS_macro, seq)
+//..................................................................................
+#define _UNROLL_GEN_TPL_foreach_ee(z, n, seq) \
+ executor<n>(_UNROL_GEN_TPL_ARGS(seq));
+
+#define _UNROLL_GEN_TPL(name, args_seq, operation, spec) \
+ template<> struct name<spec> { \
+ private: \
+ template<S32 _idx> inline void executor(_UNROL_GEN_TPL_TYPE_ARGS(args_seq)) { \
+ BOOST_PP_SEQ_ENUM(operation) ; \
+ } \
+ public: \
+ inline void operator()(_UNROL_GEN_TPL_TYPE_ARGS(args_seq)) { \
+ BOOST_PP_REPEAT(spec, _UNROLL_GEN_TPL_foreach_ee, args_seq) \
+ } \
+};
+//..................................................................................
+#define _UNROLL_GEN_TPL_foreach_seq_macro(r, data, elem) \
+ _UNROLL_GEN_TPL(BOOST_PP_SEQ_ELEM(0, data), BOOST_PP_SEQ_ELEM(1, data), BOOST_PP_SEQ_ELEM(2, data), elem)
+
+#define UNROLL_GEN_TPL(name, args_seq, operation, spec_seq) \
+ /*general specialization - should not be implemented!*/ \
+ template<U8> struct name { inline void operator()(_UNROL_GEN_TPL_TYPE_ARGS(args_seq)) { boost::static_assert(!"Should not be instantiated."); } }; \
+ BOOST_PP_SEQ_FOR_EACH(_UNROLL_GEN_TPL_foreach_seq_macro, (name)(args_seq)(operation), spec_seq)
+//..................................................................................
+//..................................................................................
+
+
+//..................................................................................
+// Generated unrolling loop templates with specializations
+//..................................................................................
+//example: for(c = 0; c < ch; ++c) comp[c] = cx[0] = 0;
+UNROLL_GEN_TPL(uroll_zeroze_cx_comp, (S32 *)(cx)(S32 *)(comp), (cx[_idx] = comp[_idx] = 0), (1)(3)(4));
+//example: for(c = 0; c < ch; ++c) comp[c] >>= 4;
+UNROLL_GEN_TPL(uroll_comp_rshftasgn_constval, (S32 *)(comp)(const S32)(cval), (comp[_idx] >>= cval), (1)(3)(4));
+//example: for(c = 0; c < ch; ++c) comp[c] = (cx[c] >> 5) * yap;
+UNROLL_GEN_TPL(uroll_comp_asgn_cx_rshft_cval_all_mul_val, (S32 *)(comp)(S32 *)(cx)(const S32)(cval)(S32)(val), (comp[_idx] = (cx[_idx] >> cval) * val), (1)(3)(4));
+//example: for(c = 0; c < ch; ++c) comp[c] += (cx[c] >> 5) * Cy;
+UNROLL_GEN_TPL(uroll_comp_plusasgn_cx_rshft_cval_all_mul_val, (S32 *)(comp)(S32 *)(cx)(const S32)(cval)(S32)(val), (comp[_idx] += (cx[_idx] >> cval) * val), (1)(3)(4));
+//example: for(c = 0; c < ch; ++c) comp[c] += pix[c] * info.xapoints[x];
+UNROLL_GEN_TPL(uroll_inp_plusasgn_pix_mul_val, (S32 *)(comp)(const U8 *)(pix)(S32)(val), (comp[_idx] += pix[_idx] * val), (1)(3)(4));
+//example: for(c = 0; c < ch; ++c) cx[c] = pix[c] * info.xapoints[x];
+UNROLL_GEN_TPL(uroll_inp_asgn_pix_mul_val, (S32 *)(comp)(const U8 *)(pix)(S32)(val), (comp[_idx] = pix[_idx] * val), (1)(3)(4));
+//example: for(c = 0; c < ch; ++c) comp[c] = ((cx[c] * info.yapoints[y]) + (comp[c] * (256 - info.yapoints[y]))) >> 16;
+UNROLL_GEN_TPL(uroll_comp_asgn_cx_mul_apoint_plus_comp_mul_inv_apoint_allshifted_16_r, (S32 *)(comp)(S32 *)(cx)(S32)(apoint), (comp[_idx] = ((cx[_idx] * apoint) + (comp[_idx] * (256 - apoint))) >> 16), (1)(3)(4));
+//example: for(c = 0; c < ch; ++c) comp[c] = (comp[c] + pix[c] * info.yapoints[y]) >> 8;
+UNROLL_GEN_TPL(uroll_comp_asgn_comp_plus_pix_mul_apoint_allshifted_8_r, (S32 *)(comp)(const U8 *)(pix)(S32)(apoint), (comp[_idx] = (comp[_idx] + pix[_idx] * apoint) >> 8), (1)(3)(4));
+//example: for(c = 0; c < ch; ++c) comp[c] = ((comp[c]*(256 - info.xapoints[x])) + ((cx[c] * info.xapoints[x]))) >> 12;
+UNROLL_GEN_TPL(uroll_comp_asgn_comp_mul_inv_apoint_plus_cx_mul_apoint_allshifted_12_r, (S32 *)(comp)(S32)(apoint)(S32 *)(cx), (comp[_idx] = ((comp[_idx] * (256-apoint)) + (cx[_idx] * apoint)) >> 12), (1)(3)(4));
+//example: for(c = 0; c < ch; ++c) *dptr++ = comp[c]&0xff;
+UNROLL_GEN_TPL(uroll_uref_dptr_inc_asgn_comp_and_ff, (U8 *&)(dptr)(S32 *)(comp), (*dptr++ = comp[_idx]&0xff), (1)(3)(4));
+//example: for(c = 0; c < ch; ++c) *dptr++ = (sptr[info.xpoints[x]*ch + c])&0xff;
+UNROLL_GEN_TPL(uroll_uref_dptr_inc_asgn_sptr_apoint_plus_idx_alland_ff, (U8 *&)(dptr)(const U8 *)(sptr)(S32)(apoint), (*dptr++ = sptr[apoint + _idx]&0xff), (1)(3)(4));
+//example: for(c = 0; c < ch; ++c) *dptr++ = (comp[c]>>10)&0xff;
+UNROLL_GEN_TPL(uroll_uref_dptr_inc_asgn_comp_rshft_cval_and_ff, (U8 *&)(dptr)(S32 *)(comp)(const S32)(cval), (*dptr++ = (comp[_idx]>>cval)&0xff), (1)(3)(4));
+//..................................................................................
+
+
+template<U8 ch>
+struct scale_info
+{
+public:
+ std::vector<S32> xpoints;
+ std::vector<const U8*> ystrides;
+ std::vector<S32> xapoints, yapoints;
+ S32 xup_yup;
+
+public:
+ //unrolling loop types declaration
+ typedef uroll_zeroze_cx_comp<ch> uroll_zeroze_cx_comp_t;
+ typedef uroll_comp_rshftasgn_constval<ch> uroll_comp_rshftasgn_constval_t;
+ typedef uroll_comp_asgn_cx_rshft_cval_all_mul_val<ch> uroll_comp_asgn_cx_rshft_cval_all_mul_val_t;
+ typedef uroll_comp_plusasgn_cx_rshft_cval_all_mul_val<ch> uroll_comp_plusasgn_cx_rshft_cval_all_mul_val_t;
+ typedef uroll_inp_plusasgn_pix_mul_val<ch> uroll_inp_plusasgn_pix_mul_val_t;
+ typedef uroll_inp_asgn_pix_mul_val<ch> uroll_inp_asgn_pix_mul_val_t;
+ typedef uroll_comp_asgn_cx_mul_apoint_plus_comp_mul_inv_apoint_allshifted_16_r<ch> uroll_comp_asgn_cx_mul_apoint_plus_comp_mul_inv_apoint_allshifted_16_r_t;
+ typedef uroll_comp_asgn_comp_plus_pix_mul_apoint_allshifted_8_r<ch> uroll_comp_asgn_comp_plus_pix_mul_apoint_allshifted_8_r_t;
+ typedef uroll_comp_asgn_comp_mul_inv_apoint_plus_cx_mul_apoint_allshifted_12_r<ch> uroll_comp_asgn_comp_mul_inv_apoint_plus_cx_mul_apoint_allshifted_12_r_t;
+ typedef uroll_uref_dptr_inc_asgn_comp_and_ff<ch> uroll_uref_dptr_inc_asgn_comp_and_ff_t;
+ typedef uroll_uref_dptr_inc_asgn_sptr_apoint_plus_idx_alland_ff<ch> uroll_uref_dptr_inc_asgn_sptr_apoint_plus_idx_alland_ff_t;
+ typedef uroll_uref_dptr_inc_asgn_comp_rshft_cval_and_ff<ch> uroll_uref_dptr_inc_asgn_comp_rshft_cval_and_ff_t;
+
+public:
+ scale_info(const U8 *src, U32 srcW, U32 srcH, U32 dstW, U32 dstH, U32 srcStride)
+ : xup_yup((dstW >= srcW) + ((dstH >= srcH) << 1))
+ {
+ calc_x_points(srcW, dstW);
+ calc_y_strides(src, srcStride, srcH, dstH);
+ calc_aa_points(srcW, dstW, xup_yup&1, xapoints);
+ calc_aa_points(srcH, dstH, xup_yup&2, yapoints);
+ }
+
+private:
+ //...........................................................................................
+ void calc_x_points(U32 srcW, U32 dstW)
+ {
+ xpoints.resize(dstW+1);
+
+ S32 val = dstW >= srcW ? 0x8000 * srcW / dstW - 0x8000 : 0;
+ S32 inc = (srcW << 16) / dstW;
+
+ for(U32 i = 0, j = 0; i < dstW; ++i, ++j, val += inc)
+ {
+ xpoints[j] = llmax(0, val >> 16);
+ }
+ }
+ //...........................................................................................
+ void calc_y_strides(const U8 *src, U32 srcStride, U32 srcH, U32 dstH)
+ {
+ ystrides.resize(dstH+1);
+
+ S32 val = dstH >= srcH ? 0x8000 * srcH / dstH - 0x8000 : 0;
+ S32 inc = (srcH << 16) / dstH;
+
+ for(U32 i = 0, j = 0; i < dstH; ++i, ++j, val += inc)
+ {
+ ystrides[j] = src + llmax(0, val >> 16) * srcStride;
+ }
+ }
+ //...........................................................................................
+ void calc_aa_points(U32 srcSz, U32 dstSz, bool scale_up, std::vector<S32> &vp)
+ {
+ vp.resize(dstSz);
+
+ if(scale_up)
+ {
+ S32 val = 0x8000 * srcSz / dstSz - 0x8000;
+ S32 inc = (srcSz << 16) / dstSz;
+ U32 pos;
+
+ for(U32 i = 0, j = 0; i < dstSz; ++i, ++j, val += inc)
+ {
+ pos = val >> 16;
+
+ if (pos < 0)
+ vp[j] = 0;
+ else if (pos >= (srcSz - 1))
+ vp[j] = 0;
+ else
+ vp[j] = (val >> 8) - ((val >> 8) & 0xffffff00);
+ }
+ }
+ else
+ {
+ S32 inc = (srcSz << 16) / dstSz;
+ S32 Cp = ((dstSz << 14) / srcSz) + 1;
+ S32 ap;
+
+ for(U32 i = 0, j = 0, val = 0; i < dstSz; ++i, ++j, val += inc)
+ {
+ ap = ((0x100 - ((val >> 8) & 0xff)) * Cp) >> 8;
+ vp[j] = ap | (Cp << 16);
+ }
+ }
+ }
+};
+
+
+template<U8 ch>
+inline void bilinear_scale(
+ const U8 *src, U32 srcW, U32 srcH, U32 srcStride
+ , U8 *dst, U32 dstW, U32 dstH, U32 dstStride
+ )
+{
+ typedef scale_info<ch> scale_info_t;
+
+ scale_info_t info(src, srcW, srcH, dstW, dstH, srcStride);
+
+ const U8 *sptr;
+ U8 *dptr;
+ U32 x, y;
+ const U8 *pix;
+
+ S32 cx[ch], comp[ch];
+
+
+ if(3 == info.xup_yup)
+ { //scale x/y - up
+ for(y = 0; y < dstH; ++y)
+ {
+ dptr = dst + (y * dstStride);
+ sptr = info.ystrides[y];
+
+ if(0 < info.yapoints[y])
+ {
+ for(x = 0; x < dstW; ++x)
+ {
+ //for(c = 0; c < ch; ++c) cx[c] = comp[c] = 0;
+ scale_info_t::uroll_zeroze_cx_comp_t()(cx, comp);
+
+ if(0 < info.xapoints[x])
+ {
+ pix = info.ystrides[y] + info.xpoints[x] * ch;
+
+ //for(c = 0; c < ch; ++c) comp[c] = pix[c] * (256 - info.xapoints[x]);
+ scale_info_t::uroll_inp_asgn_pix_mul_val_t()(comp, pix, 256 - info.xapoints[x]);
+
+ pix += ch;
+
+ //for(c = 0; c < ch; ++c) comp[c] += pix[c] * info.xapoints[x];
+ scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(comp, pix, info.xapoints[x]);
+
+ pix += srcStride;
+
+ //for(c = 0; c < ch; ++c) cx[c] = pix[c] * info.xapoints[x];
+ scale_info_t::uroll_inp_asgn_pix_mul_val_t()(cx, pix, info.xapoints[x]);
+
+ pix -= ch;
+
+ //for(c = 0; c < ch; ++c) {
+ // cx[c] += pix[c] * (256 - info.xapoints[x]);
+ // comp[c] = ((cx[c] * info.yapoints[y]) + (comp[c] * (256 - info.yapoints[y]))) >> 16;
+ // *dptr++ = comp[c]&0xff;
+ //}
+ scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(cx, pix, 256 - info.xapoints[x]);
+ scale_info_t::uroll_comp_asgn_cx_mul_apoint_plus_comp_mul_inv_apoint_allshifted_16_r_t()(comp, cx, info.yapoints[y]);
+ scale_info_t::uroll_uref_dptr_inc_asgn_comp_and_ff_t()(dptr, comp);
+ }
+ else
+ {
+ pix = info.ystrides[y] + info.xpoints[x] * ch;
+
+ //for(c = 0; c < ch; ++c) comp[c] = pix[c] * (256 - info.yapoints[y]);
+ scale_info_t::uroll_inp_asgn_pix_mul_val_t()(comp, pix, 256-info.yapoints[y]);
+
+ pix += srcStride;
+
+ //for(c = 0; c < ch; ++c) {
+ // comp[c] = (comp[c] + pix[c] * info.yapoints[y]) >> 8;
+ // *dptr++ = comp[c]&0xff;
+ //}
+ scale_info_t::uroll_comp_asgn_comp_plus_pix_mul_apoint_allshifted_8_r_t()(comp, pix, info.yapoints[y]);
+ scale_info_t::uroll_uref_dptr_inc_asgn_comp_and_ff_t()(dptr, comp);
+ }
+ }
+ }
+ else
+ {
+ for(x = 0; x < dstW; ++x)
+ {
+ if(0 < info.xapoints[x])
+ {
+ pix = info.ystrides[y] + info.xpoints[x] * ch;
+
+ //for(c = 0; c < ch; ++c) {
+ // comp[c] = pix[c] * (256 - info.xapoints[x]);
+ // comp[c] = (comp[c] + pix[c] * info.xapoints[x]) >> 8;
+ // *dptr++ = comp[c]&0xff;
+ //}
+ scale_info_t::uroll_inp_asgn_pix_mul_val_t()(comp, pix, 256 - info.xapoints[x]);
+ scale_info_t::uroll_comp_asgn_comp_plus_pix_mul_apoint_allshifted_8_r_t()(comp, pix, info.xapoints[x]);
+ scale_info_t::uroll_uref_dptr_inc_asgn_comp_and_ff_t()(dptr, comp);
+ }
+ else
+ {
+ //for(c = 0; c < ch; ++c) *dptr++ = (sptr[info.xpoints[x]*ch + c])&0xff;
+ scale_info_t::uroll_uref_dptr_inc_asgn_sptr_apoint_plus_idx_alland_ff_t()(dptr, sptr, info.xpoints[x]*ch);
+ }
+ }
+ }
+ }
+ }
+ else if(info.xup_yup == 1)
+ { //scaling down vertically
+ S32 Cy, j;
+ S32 yap;
+
+ for(y = 0; y < dstH; y++)
+ {
+ Cy = info.yapoints[y] >> 16;
+ yap = info.yapoints[y] & 0xffff;
+
+ dptr = dst + (y * dstStride);
+
+ for(x = 0; x < dstW; x++)
+ {
+ pix = info.ystrides[y] + info.xpoints[x] * ch;
+
+ //for(c = 0; c < ch; ++c) comp[c] = pix[c] * yap;
+ scale_info_t::uroll_inp_asgn_pix_mul_val_t()(comp, pix, yap);
+
+ pix += srcStride;
+
+ for(j = (1 << 14) - yap; j > Cy; j -= Cy, pix += srcStride)
+ {
+ //for(c = 0; c < ch; ++c) comp[c] += pix[c] * Cy;
+ scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(comp, pix, Cy);
+ }
+
+ if(j > 0)
+ {
+ //for(c = 0; c < ch; ++c) comp[c] += pix[c] * j;
+ scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(comp, pix, j);
+ }
+
+ if(info.xapoints[x] > 0)
+ {
+ pix = info.ystrides[y] + info.xpoints[x]*ch + ch;
+ //for(c = 0; c < ch; ++c) cx[c] = pix[c] * yap;
+ scale_info_t::uroll_inp_asgn_pix_mul_val_t()(cx, pix, yap);
+
+ pix += srcStride;
+ for(j = (1 << 14) - yap; j > Cy; j -= Cy)
+ {
+ //for(c = 0; c < ch; ++c) cx[c] += pix[c] * Cy;
+ scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(cx, pix, Cy);
+ pix += srcStride;
+ }
+
+ if(j > 0)
+ {
+ //for(c = 0; c < ch; ++c) cx[c] += pix[c] * j;
+ scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(cx, pix, j);
+ }
+
+ //for(c = 0; c < ch; ++c) comp[c] = ((comp[c]*(256 - info.xapoints[x])) + ((cx[c] * info.xapoints[x]))) >> 12;
+ scale_info_t::uroll_comp_asgn_comp_mul_inv_apoint_plus_cx_mul_apoint_allshifted_12_r_t()(comp, info.xapoints[x], cx);
+ }
+ else
+ {
+ //for(c = 0; c < ch; ++c) comp[c] >>= 4;
+ scale_info_t::uroll_comp_rshftasgn_constval_t()(comp, 4);
+ }
+
+ //for(c = 0; c < ch; ++c) *dptr++ = (comp[c]>>10)&0xff;
+ scale_info_t::uroll_uref_dptr_inc_asgn_comp_rshft_cval_and_ff_t()(dptr, comp, 10);
+ }
+ }
+ }
+ else if(info.xup_yup == 2)
+ { // scaling down horizontally
+ S32 Cx, j;
+ S32 xap;
+
+ for(y = 0; y < dstH; y++)
+ {
+ dptr = dst + (y * dstStride);
+
+ for(x = 0; x < dstW; x++)
+ {
+ Cx = info.xapoints[x] >> 16;
+ xap = info.xapoints[x] & 0xffff;
+
+ pix = info.ystrides[y] + info.xpoints[x] * ch;
+
+ //for(c = 0; c < ch; ++c) comp[c] = pix[c] * xap;
+ scale_info_t::uroll_inp_asgn_pix_mul_val_t()(comp, pix, xap);
+
+ pix+=ch;
+ for(j = (1 << 14) - xap; j > Cx; j -= Cx)
+ {
+ //for(c = 0; c < ch; ++c) comp[c] += pix[c] * Cx;
+ scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(comp, pix, Cx);
+ pix+=ch;
+ }
+
+ if(j > 0)
+ {
+ //for(c = 0; c < ch; ++c) comp[c] += pix[c] * j;
+ scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(comp, pix, j);
+ }
+
+ if(info.yapoints[y] > 0)
+ {
+ pix = info.ystrides[y] + info.xpoints[x]*ch + srcStride;
+ //for(c = 0; c < ch; ++c) cx[c] = pix[c] * xap;
+ scale_info_t::uroll_inp_asgn_pix_mul_val_t()(cx, pix, xap);
+
+ pix+=ch;
+ for(j = (1 << 14) - xap; j > Cx; j -= Cx)
+ {
+ //for(c = 0; c < ch; ++c) cx[c] += pix[c] * Cx;
+ scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(cx, pix, Cx);
+ pix+=ch;
+ }
+
+ if(j > 0)
+ {
+ //for(c = 0; c < ch; ++c) cx[c] += pix[c] * j;
+ scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(cx, pix, j);
+ }
+
+ //for(c = 0; c < ch; ++c) comp[c] = ((comp[c] * (256 - info.yapoints[y])) + ((cx[c] * info.yapoints[y]))) >> 12;
+ scale_info_t::uroll_comp_asgn_comp_mul_inv_apoint_plus_cx_mul_apoint_allshifted_12_r_t()(comp, info.yapoints[y], cx);
+ }
+ else
+ {
+ //for(c = 0; c < ch; ++c) comp[c] >>= 4;
+ scale_info_t::uroll_comp_rshftasgn_constval_t()(comp, 4);
+ }
+
+ //for(c = 0; c < ch; ++c) *dptr++ = (comp[c]>>10)&0xff;
+ scale_info_t::uroll_uref_dptr_inc_asgn_comp_rshft_cval_and_ff_t()(dptr, comp, 10);
+ }
+ }
+ }
+ else
+ { //scale x/y - down
+ S32 Cx, Cy, i, j;
+ S32 xap, yap;
+
+ for(y = 0; y < dstH; y++)
+ {
+ Cy = info.yapoints[y] >> 16;
+ yap = info.yapoints[y] & 0xffff;
+
+ dptr = dst + (y * dstStride);
+ for(x = 0; x < dstW; x++)
+ {
+ Cx = info.xapoints[x] >> 16;
+ xap = info.xapoints[x] & 0xffff;
+
+ sptr = info.ystrides[y] + info.xpoints[x] * ch;
+ pix = sptr;
+ sptr += srcStride;
+
+ //for(c = 0; c < ch; ++c) cx[c] = pix[c] * xap;
+ scale_info_t::uroll_inp_asgn_pix_mul_val_t()(cx, pix, xap);
+
+ pix+=ch;
+ for(i = (1 << 14) - xap; i > Cx; i -= Cx)
+ {
+ //for(c = 0; c < ch; ++c) cx[c] += pix[c] * Cx;
+ scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(cx, pix, Cx);
+ pix+=ch;
+ }
+
+ if(i > 0)
+ {
+ //for(c = 0; c < ch; ++c) cx[c] += pix[c] * i;
+ scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(cx, pix, i);
+ }
+
+ //for(c = 0; c < ch; ++c) comp[c] = (cx[c] >> 5) * yap;
+ scale_info_t::uroll_comp_asgn_cx_rshft_cval_all_mul_val_t()(comp, cx, 5, yap);
+
+ for(j = (1 << 14) - yap; j > Cy; j -= Cy)
+ {
+ pix = sptr;
+ sptr += srcStride;
+
+ //for(c = 0; c < ch; ++c) cx[c] = pix[c] * xap;
+ scale_info_t::uroll_inp_asgn_pix_mul_val_t()(cx, pix, xap);
+
+ pix+=ch;
+ for(i = (1 << 14) - xap; i > Cx; i -= Cx)
+ {
+ //for(c = 0; c < ch; ++c) cx[c] += pix[c] * Cx;
+ scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(cx, pix, Cx);
+ pix+=ch;
+ }
+
+ if(i > 0)
+ {
+ //for(c = 0; c < ch; ++c) cx[c] += pix[c] * i;
+ scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(cx, pix, i);
+ }
+
+ //for(c = 0; c < ch; ++c) comp[c] += (cx[c] >> 5) * Cy;
+ scale_info_t::uroll_comp_plusasgn_cx_rshft_cval_all_mul_val_t()(comp, cx, 5, Cy);
+ }
+
+ if(j > 0)
+ {
+ pix = sptr;
+ sptr += srcStride;
+
+ //for(c = 0; c < ch; ++c) cx[c] = pix[c] * xap;
+ scale_info_t::uroll_inp_asgn_pix_mul_val_t()(cx, pix, xap);
+
+ pix+=ch;
+ for(i = (1 << 14) - xap; i > Cx; i -= Cx)
+ {
+ //for(c = 0; c < ch; ++c) cx[c] += pix[c] * Cx;
+ scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(cx, pix, Cx);
+ pix+=ch;
+ }
+
+ if(i > 0)
+ {
+ //for(c = 0; c < ch; ++c) cx[c] += pix[c] * i;
+ scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(cx, pix, i);
+ }
+
+ //for(c = 0; c < ch; ++c) comp[c] += (cx[c] >> 5) * j;
+ scale_info_t::uroll_comp_plusasgn_cx_rshft_cval_all_mul_val_t()(comp, cx, 5, j);
+ }
+
+ //for(c = 0; c < ch; ++c) *dptr++ = (comp[c]>>23)&0xff;
+ scale_info_t::uroll_uref_dptr_inc_asgn_comp_rshft_cval_and_ff_t()(dptr, comp, 23);
+ }
+ }
+ } //else
+}
+
+//wrapper
+static void bilinear_scale(const U8 *src, U32 srcW, U32 srcH, U32 srcCh, U32 srcStride, U8 *dst, U32 dstW, U32 dstH, U32 dstCh, U32 dstStride)
+{
+ llassert(srcCh == dstCh);
+
+ switch(srcCh)
+ {
+ case 1:
+ bilinear_scale<1>(src, srcW, srcH, srcStride, dst, dstW, dstH, dstStride);
+ break;
+ case 3:
+ bilinear_scale<3>(src, srcW, srcH, srcStride, dst, dstW, dstH, dstStride);
+ break;
+ case 4:
+ bilinear_scale<4>(src, srcW, srcH, srcStride, dst, dstW, dstH, dstStride);
+ break;
+ default:
+ llassert(!"Implement if need");
+ break;
+ }
+
+}
+
//---------------------------------------------------------------------------
// LLImage
//---------------------------------------------------------------------------
@@ -559,6 +1101,7 @@ void LLImageRaw::composite( LLImageRaw* src )
}
}
+
// Src and dst can be any size. Src has 4 components. Dst has 3 components.
void LLImageRaw::compositeScaled4onto3(LLImageRaw* src)
{
@@ -589,21 +1132,6 @@ void LLImageRaw::compositeScaled4onto3(LLImageRaw* src)
// Src and dst are same size. Src has 4 components. Dst has 3 components.
void LLImageRaw::compositeUnscaled4onto3( LLImageRaw* src )
{
- /*
- //test fastFractionalMult()
- {
- U8 i = 255;
- U8 j = 255;
- do
- {
- do
- {
- llassert( fastFractionalMult(i, j) == (U8)(255*(i/255.f)*(j/255.f) + 0.5f) );
- } while( j-- );
- } while( i-- );
- }
- */
-
LLImageRaw* dst = this; // Just for clarity.
llassert( (3 == src->getComponents()) || (4 == src->getComponents()) );
@@ -639,6 +1167,7 @@ void LLImageRaw::compositeUnscaled4onto3( LLImageRaw* src )
}
}
+
void LLImageRaw::copyUnscaledAlphaMask( LLImageRaw* src, const LLColor4U& fill)
{
LLImageRaw* dst = this; // Just for clarity.
@@ -846,6 +1375,12 @@ void LLImageRaw::copyScaled( LLImageRaw* src )
return;
}
+ bilinear_scale(
+ src->getData(), src->getWidth(), src->getHeight(), src->getComponents(), src->getWidth()*src->getComponents()
+ , dst->getData(), dst->getWidth(), dst->getHeight(), dst->getComponents(), dst->getWidth()*dst->getComponents()
+ );
+
+ /*
S32 temp_data_size = src->getWidth() * dst->getHeight() * getComponents();
llassert_always(temp_data_size > 0);
std::vector<U8> temp_buffer(temp_data_size);
@@ -861,6 +1396,7 @@ void LLImageRaw::copyScaled( LLImageRaw* src )
{
copyLineScaled( &temp_buffer[0] + (getComponents() * src->getWidth() * row), dst->getData() + (getComponents() * dst->getWidth() * row), src->getWidth(), dst->getWidth(), 1, 1 );
}
+ */
}
@@ -880,6 +1416,7 @@ BOOL LLImageRaw::scale( S32 new_width, S32 new_height, BOOL scale_image_data )
if (scale_image_data)
{
+ /*
S32 temp_data_size = old_width * new_height * getComponents();
llassert_always(temp_data_size > 0);
std::vector<U8> temp_buffer(temp_data_size);
@@ -899,6 +1436,19 @@ BOOL LLImageRaw::scale( S32 new_width, S32 new_height, BOOL scale_image_data )
{
copyLineScaled( &temp_buffer[0] + (getComponents() * old_width * row), new_buffer + (getComponents() * new_width * row), old_width, new_width, 1, 1 );
}
+ */
+
+ S32 new_data_size = new_width * new_height * getComponents();
+ llassert_always(new_data_size > 0);
+
+ U8 *new_data = (U8*)ALLOCATE_MEM(LLImageBase::getPrivatePool(), new_data_size);
+ if(NULL == new_data)
+ {
+ return FALSE;
+ }
+
+ bilinear_scale(getData(), old_width, old_height, getComponents(), old_width*getComponents(), new_data, new_width, new_height, getComponents(), new_width*getComponents());
+ setDataAndSize(new_data, new_width, new_height, getComponents());
}
else
{