diff options
Diffstat (limited to 'indra/llimage')
| -rwxr-xr-x | indra/llimage/llimage.cpp | 579 | 
1 files changed, 564 insertions, 15 deletions
| diff --git a/indra/llimage/llimage.cpp b/indra/llimage/llimage.cpp index 16df27bb8e..7645034bd9 100755 --- a/indra/llimage/llimage.cpp +++ b/indra/llimage/llimage.cpp @@ -40,6 +40,547 @@  #include "llimagedxt.h"  #include "llmemory.h" +#include <boost/preprocessor.hpp> + +//.................................................................................. +//.................................................................................. +// Helper macrose's for generate cycle unwrap templates +//.................................................................................. +#define _UNROL_GEN_TPL_arg_0(arg) +#define _UNROL_GEN_TPL_arg_1(arg) arg + +#define _UNROL_GEN_TPL_comma_0 +#define _UNROL_GEN_TPL_comma_1 BOOST_PP_COMMA() +//.................................................................................. +#define _UNROL_GEN_TPL_ARGS_macro(z,n,seq) \ +	BOOST_PP_CAT(_UNROL_GEN_TPL_arg_, BOOST_PP_MOD(n, 2))(BOOST_PP_SEQ_ELEM(n, seq)) BOOST_PP_CAT(_UNROL_GEN_TPL_comma_, BOOST_PP_AND(BOOST_PP_MOD(n, 2), BOOST_PP_NOT_EQUAL(BOOST_PP_INC(n), BOOST_PP_SEQ_SIZE(seq)))) + +#define _UNROL_GEN_TPL_ARGS(seq) \ +	BOOST_PP_REPEAT(BOOST_PP_SEQ_SIZE(seq), _UNROL_GEN_TPL_ARGS_macro, seq) +//.................................................................................. + +#define _UNROL_GEN_TPL_TYPE_ARGS_macro(z,n,seq) \ +	BOOST_PP_SEQ_ELEM(n, seq) BOOST_PP_CAT(_UNROL_GEN_TPL_comma_, BOOST_PP_AND(BOOST_PP_MOD(n, 2), BOOST_PP_NOT_EQUAL(BOOST_PP_INC(n), BOOST_PP_SEQ_SIZE(seq)))) + +#define _UNROL_GEN_TPL_TYPE_ARGS(seq) \ +	BOOST_PP_REPEAT(BOOST_PP_SEQ_SIZE(seq), _UNROL_GEN_TPL_TYPE_ARGS_macro, seq) +//.................................................................................. +#define _UNROLL_GEN_TPL_foreach_ee(z, n, seq) \ +	executor<n>(_UNROL_GEN_TPL_ARGS(seq)); + +#define _UNROLL_GEN_TPL(name, args_seq, operation, spec) \ +	template<> struct name<spec> { \ +	private: \ +		template<S32 _idx> inline void executor(_UNROL_GEN_TPL_TYPE_ARGS(args_seq)) { \ +			BOOST_PP_SEQ_ENUM(operation) ; \ +		} \ +	public: \ +		inline void operator()(_UNROL_GEN_TPL_TYPE_ARGS(args_seq)) { \ +			BOOST_PP_REPEAT(spec, _UNROLL_GEN_TPL_foreach_ee, args_seq) \ +		} \ +}; +//.................................................................................. +#define _UNROLL_GEN_TPL_foreach_seq_macro(r, data, elem) \ +	_UNROLL_GEN_TPL(BOOST_PP_SEQ_ELEM(0, data), BOOST_PP_SEQ_ELEM(1, data), BOOST_PP_SEQ_ELEM(2, data), elem) + +#define UNROLL_GEN_TPL(name, args_seq, operation, spec_seq) \ +	/*general specialization - should not be implemented!*/ \ +	template<U8> struct name { inline void operator()(_UNROL_GEN_TPL_TYPE_ARGS(args_seq)) { /*static_assert(!"Should not be instantiated.");*/  } }; \ +	BOOST_PP_SEQ_FOR_EACH(_UNROLL_GEN_TPL_foreach_seq_macro, (name)(args_seq)(operation), spec_seq) +//.................................................................................. +//.................................................................................. + + +//.................................................................................. +// Generated unrolling loop templates with specializations +//.................................................................................. +//example: for(c = 0; c < ch; ++c) comp[c] = cx[0] = 0; +UNROLL_GEN_TPL(uroll_zeroze_cx_comp, (S32 *)(cx)(S32 *)(comp), (cx[_idx] = comp[_idx] = 0), (1)(3)(4)); +//example: for(c = 0; c < ch; ++c) comp[c] >>= 4; +UNROLL_GEN_TPL(uroll_comp_rshftasgn_constval, (S32 *)(comp)(const S32)(cval), (comp[_idx] >>= cval), (1)(3)(4)); +//example: for(c = 0; c < ch; ++c) comp[c] = (cx[c] >> 5) * yap; +UNROLL_GEN_TPL(uroll_comp_asgn_cx_rshft_cval_all_mul_val, (S32 *)(comp)(S32 *)(cx)(const S32)(cval)(S32)(val), (comp[_idx] = (cx[_idx] >> cval) * val), (1)(3)(4)); +//example: for(c = 0; c < ch; ++c) comp[c] += (cx[c] >> 5) * Cy; +UNROLL_GEN_TPL(uroll_comp_plusasgn_cx_rshft_cval_all_mul_val, (S32 *)(comp)(S32 *)(cx)(const S32)(cval)(S32)(val), (comp[_idx] += (cx[_idx] >> cval) * val), (1)(3)(4)); +//example: for(c = 0; c < ch; ++c) comp[c] += pix[c] * info.xapoints[x]; +UNROLL_GEN_TPL(uroll_inp_plusasgn_pix_mul_val, (S32 *)(comp)(const U8 *)(pix)(S32)(val), (comp[_idx] += pix[_idx] * val), (1)(3)(4)); +//example: for(c = 0; c < ch; ++c) cx[c] = pix[c] * info.xapoints[x]; +UNROLL_GEN_TPL(uroll_inp_asgn_pix_mul_val, (S32 *)(comp)(const U8 *)(pix)(S32)(val), (comp[_idx] = pix[_idx] * val), (1)(3)(4)); +//example: for(c = 0; c < ch; ++c) comp[c] = ((cx[c] * info.yapoints[y]) + (comp[c] * (256 - info.yapoints[y]))) >> 16; +UNROLL_GEN_TPL(uroll_comp_asgn_cx_mul_apoint_plus_comp_mul_inv_apoint_allshifted_16_r, (S32 *)(comp)(S32 *)(cx)(S32)(apoint), (comp[_idx] = ((cx[_idx] * apoint) + (comp[_idx] * (256 - apoint))) >> 16), (1)(3)(4)); +//example: for(c = 0; c < ch; ++c) comp[c] = (comp[c] + pix[c] * info.yapoints[y]) >> 8; +UNROLL_GEN_TPL(uroll_comp_asgn_comp_plus_pix_mul_apoint_allshifted_8_r, (S32 *)(comp)(const U8 *)(pix)(S32)(apoint), (comp[_idx] = (comp[_idx] + pix[_idx] * apoint) >> 8), (1)(3)(4)); +//example: for(c = 0; c < ch; ++c) comp[c] = ((comp[c]*(256 - info.xapoints[x])) + ((cx[c] * info.xapoints[x]))) >> 12; +UNROLL_GEN_TPL(uroll_comp_asgn_comp_mul_inv_apoint_plus_cx_mul_apoint_allshifted_12_r, (S32 *)(comp)(S32)(apoint)(S32 *)(cx), (comp[_idx] = ((comp[_idx] * (256-apoint)) + (cx[_idx] * apoint)) >> 12), (1)(3)(4)); +//example: for(c = 0; c < ch; ++c) *dptr++ = comp[c]&0xff; +UNROLL_GEN_TPL(uroll_uref_dptr_inc_asgn_comp_and_ff, (U8 *&)(dptr)(S32 *)(comp), (*dptr++ = comp[_idx]&0xff), (1)(3)(4)); +//example: for(c = 0; c < ch; ++c) *dptr++ = (sptr[info.xpoints[x]*ch + c])&0xff; +UNROLL_GEN_TPL(uroll_uref_dptr_inc_asgn_sptr_apoint_plus_idx_alland_ff, (U8 *&)(dptr)(const U8 *)(sptr)(S32)(apoint), (*dptr++ = sptr[apoint + _idx]&0xff), (1)(3)(4)); +//example: for(c = 0; c < ch; ++c) *dptr++ = (comp[c]>>10)&0xff; +UNROLL_GEN_TPL(uroll_uref_dptr_inc_asgn_comp_rshft_cval_and_ff, (U8 *&)(dptr)(S32 *)(comp)(const S32)(cval), (*dptr++ = (comp[_idx]>>cval)&0xff), (1)(3)(4)); +//.................................................................................. + + +template<U8 ch> +struct scale_info  +{ +public: +	std::vector<S32> xpoints; +	std::vector<const U8*> ystrides; +	std::vector<S32> xapoints, yapoints; +	S32 xup_yup; + +public: +	//unrolling loop types declaration +	typedef uroll_zeroze_cx_comp<ch>														uroll_zeroze_cx_comp_t; +	typedef uroll_comp_rshftasgn_constval<ch>												uroll_comp_rshftasgn_constval_t; +	typedef uroll_comp_asgn_cx_rshft_cval_all_mul_val<ch>									uroll_comp_asgn_cx_rshft_cval_all_mul_val_t; +	typedef uroll_comp_plusasgn_cx_rshft_cval_all_mul_val<ch>								uroll_comp_plusasgn_cx_rshft_cval_all_mul_val_t; +	typedef uroll_inp_plusasgn_pix_mul_val<ch>												uroll_inp_plusasgn_pix_mul_val_t; +	typedef uroll_inp_asgn_pix_mul_val<ch>													uroll_inp_asgn_pix_mul_val_t; +	typedef uroll_comp_asgn_cx_mul_apoint_plus_comp_mul_inv_apoint_allshifted_16_r<ch>		uroll_comp_asgn_cx_mul_apoint_plus_comp_mul_inv_apoint_allshifted_16_r_t; +	typedef uroll_comp_asgn_comp_plus_pix_mul_apoint_allshifted_8_r<ch>						uroll_comp_asgn_comp_plus_pix_mul_apoint_allshifted_8_r_t; +	typedef uroll_comp_asgn_comp_mul_inv_apoint_plus_cx_mul_apoint_allshifted_12_r<ch>		uroll_comp_asgn_comp_mul_inv_apoint_plus_cx_mul_apoint_allshifted_12_r_t; +	typedef uroll_uref_dptr_inc_asgn_comp_and_ff<ch>										uroll_uref_dptr_inc_asgn_comp_and_ff_t; +	typedef uroll_uref_dptr_inc_asgn_sptr_apoint_plus_idx_alland_ff<ch>						uroll_uref_dptr_inc_asgn_sptr_apoint_plus_idx_alland_ff_t; +	typedef uroll_uref_dptr_inc_asgn_comp_rshft_cval_and_ff<ch>								uroll_uref_dptr_inc_asgn_comp_rshft_cval_and_ff_t; + +public: +	scale_info(const U8 *src, U32 srcW, U32 srcH, U32 dstW, U32 dstH, U32 srcStride) +		: xup_yup((dstW >= srcW) + ((dstH >= srcH) << 1)) +	{ +		calc_x_points(srcW, dstW); +		calc_y_strides(src, srcStride, srcH, dstH); +		calc_aa_points(srcW, dstW, xup_yup&1, xapoints); +		calc_aa_points(srcH, dstH, xup_yup&2, yapoints); +	} + +private: +	//........................................................................................... +	void calc_x_points(U32 srcW, U32 dstW) +	{ +		xpoints.resize(dstW+1); + +		S32 val = dstW >= srcW ? 0x8000 * srcW / dstW - 0x8000 : 0; +		S32 inc = (srcW << 16) / dstW; + +		for(U32 i = 0, j = 0; i < dstW; ++i, ++j, val += inc) +		{ +			xpoints[j] = llmax(0, val >> 16); +		} +	} +	//........................................................................................... +	void calc_y_strides(const U8 *src, U32 srcStride, U32 srcH, U32 dstH) +	{ +		ystrides.resize(dstH+1); + +		S32 val = dstH >= srcH ? 0x8000 * srcH / dstH - 0x8000 : 0; +		S32 inc = (srcH << 16) / dstH; + +		for(U32 i = 0, j = 0; i < dstH; ++i, ++j, val += inc) +		{ +			ystrides[j] = src + llmax(0, val >> 16) * srcStride; +		} +	} +	//........................................................................................... +	void calc_aa_points(U32 srcSz, U32 dstSz, bool scale_up, std::vector<S32> &vp) +	{ +		vp.resize(dstSz); + +		if(scale_up) +		{ +			S32 val = 0x8000 * srcSz / dstSz - 0x8000; +			S32 inc = (srcSz << 16) / dstSz; +			U32 pos; + +			for(U32 i = 0, j = 0; i < dstSz; ++i, ++j, val += inc) +			{ +				pos = val >> 16; + +				if (pos < 0) +					vp[j] = 0; +				else if (pos >= (srcSz - 1)) +					vp[j] = 0; +				else +					vp[j] = (val >> 8) - ((val >> 8) & 0xffffff00); +			} +		} +		else +		{  +			S32 inc = (srcSz << 16) / dstSz; +			S32 Cp = ((dstSz << 14) / srcSz) + 1; +			S32 ap; + +			for(U32 i = 0, j = 0, val = 0; i < dstSz; ++i, ++j, val += inc) +			{ +				ap = ((0x100 - ((val >> 8) & 0xff)) * Cp) >> 8; +				vp[j] = ap | (Cp << 16); +			} +		} +	} +}; + + +template<U8 ch> +inline void bilinear_scale( +	const U8 *src, U32 srcW, U32 srcH, U32 srcStride +	, U8 *dst, U32 dstW, U32 dstH, U32 dstStride +	) +{ +	typedef scale_info<ch> scale_info_t; + +	scale_info_t info(src, srcW, srcH, dstW, dstH, srcStride); + +	const U8 *sptr; +	U8 *dptr; +	U32 x, y; +	const U8 *pix; + +	S32 cx[ch], comp[ch]; + + +	if(3 == info.xup_yup) +	{ //scale x/y - up +		for(y = 0; y < dstH; ++y) +		{ +			dptr = dst + (y * dstStride); +			sptr = info.ystrides[y]; + +			if(0 < info.yapoints[y]) +			{ +				for(x = 0; x < dstW; ++x) +				{ +					//for(c = 0; c < ch; ++c) cx[c] = comp[c] = 0; +					typename scale_info_t::uroll_zeroze_cx_comp_t()(cx, comp); + +					if(0 < info.xapoints[x]) +					{ +						pix = info.ystrides[y] + info.xpoints[x] * ch; + +						//for(c = 0; c < ch; ++c) comp[c] = pix[c] * (256 - info.xapoints[x]); +						typename scale_info_t::uroll_inp_asgn_pix_mul_val_t()(comp, pix, 256 - info.xapoints[x]); + +						pix += ch; + +						//for(c = 0; c < ch; ++c) comp[c] += pix[c] * info.xapoints[x]; +						typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(comp, pix, info.xapoints[x]); + +						pix += srcStride; + +						//for(c = 0; c < ch; ++c) cx[c] = pix[c] * info.xapoints[x]; +						typename scale_info_t::uroll_inp_asgn_pix_mul_val_t()(cx, pix, info.xapoints[x]); + +						pix -= ch; + +						//for(c = 0; c < ch; ++c) {  +						//	cx[c] += pix[c] * (256 - info.xapoints[x]); +						//	comp[c] = ((cx[c] * info.yapoints[y]) + (comp[c] * (256 - info.yapoints[y]))) >> 16; +						//	*dptr++ = comp[c]&0xff; +						//} +						typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(cx, pix, 256 - info.xapoints[x]); +						typename scale_info_t::uroll_comp_asgn_cx_mul_apoint_plus_comp_mul_inv_apoint_allshifted_16_r_t()(comp, cx, info.yapoints[y]); +						typename scale_info_t::uroll_uref_dptr_inc_asgn_comp_and_ff_t()(dptr, comp); +					} +					else +					{ +						pix = info.ystrides[y] + info.xpoints[x] * ch; + +						//for(c = 0; c < ch; ++c) comp[c] = pix[c] * (256 - info.yapoints[y]); +						typename scale_info_t::uroll_inp_asgn_pix_mul_val_t()(comp, pix, 256-info.yapoints[y]); + +						pix += srcStride; + +						//for(c = 0; c < ch; ++c) {  +						//	comp[c] = (comp[c] + pix[c] * info.yapoints[y]) >> 8; +						//	*dptr++ = comp[c]&0xff; +						//} +						typename scale_info_t::uroll_comp_asgn_comp_plus_pix_mul_apoint_allshifted_8_r_t()(comp, pix, info.yapoints[y]); +						typename scale_info_t::uroll_uref_dptr_inc_asgn_comp_and_ff_t()(dptr, comp); +					} +				} +			} +			else +			{ +				for(x = 0; x < dstW; ++x) +				{ +					if(0 < info.xapoints[x]) +					{ +						pix = info.ystrides[y] + info.xpoints[x] * ch; + +						//for(c = 0; c < ch; ++c) { +						//	comp[c] = pix[c] * (256 - info.xapoints[x]); +						//	comp[c] = (comp[c] + pix[c] * info.xapoints[x]) >> 8; +						//	*dptr++ = comp[c]&0xff; +						//} +						typename scale_info_t::uroll_inp_asgn_pix_mul_val_t()(comp, pix, 256 - info.xapoints[x]); +						typename scale_info_t::uroll_comp_asgn_comp_plus_pix_mul_apoint_allshifted_8_r_t()(comp, pix, info.xapoints[x]); +						typename scale_info_t::uroll_uref_dptr_inc_asgn_comp_and_ff_t()(dptr, comp); +					} +					else  +					{ +						//for(c = 0; c < ch; ++c) *dptr++ = (sptr[info.xpoints[x]*ch + c])&0xff; +						typename scale_info_t::uroll_uref_dptr_inc_asgn_sptr_apoint_plus_idx_alland_ff_t()(dptr, sptr, info.xpoints[x]*ch); +					} +				} +			} +		} +	} +	else if(info.xup_yup == 1) +	{ //scaling down vertically +		S32 Cy, j; +		S32 yap; + +		for(y = 0; y < dstH; y++) +		{ +			Cy = info.yapoints[y] >> 16; +			yap = info.yapoints[y] & 0xffff; + +			dptr = dst + (y * dstStride); + +			for(x = 0; x < dstW; x++) +			{ +				pix = info.ystrides[y] + info.xpoints[x] * ch; + +				//for(c = 0; c < ch; ++c) comp[c] = pix[c] * yap; +				typename scale_info_t::uroll_inp_asgn_pix_mul_val_t()(comp, pix, yap); + +				pix += srcStride; + +				for(j = (1 << 14) - yap; j > Cy; j -= Cy, pix += srcStride) +				{ +					//for(c = 0; c < ch; ++c) comp[c] += pix[c] * Cy; +					typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(comp, pix, Cy); +				} + +				if(j > 0) +				{ +					//for(c = 0; c < ch; ++c) comp[c] += pix[c] * j; +					typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(comp, pix, j); +				} + +				if(info.xapoints[x] > 0) +				{ +					pix = info.ystrides[y] + info.xpoints[x]*ch + ch; +					//for(c = 0; c < ch; ++c) cx[c] = pix[c] * yap; +					typename scale_info_t::uroll_inp_asgn_pix_mul_val_t()(cx, pix, yap); + +					pix += srcStride; +					for(j = (1 << 14) - yap; j > Cy; j -= Cy) +					{ +						//for(c = 0; c < ch; ++c) cx[c] += pix[c] * Cy; +						typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(cx, pix, Cy); +						pix += srcStride; +					} + +					if(j > 0) +					{ +						//for(c = 0; c < ch; ++c) cx[c] += pix[c] * j; +						typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(cx, pix, j); +					} + +					//for(c = 0; c < ch; ++c) comp[c] = ((comp[c]*(256 - info.xapoints[x])) + ((cx[c] * info.xapoints[x]))) >> 12; +					typename scale_info_t::uroll_comp_asgn_comp_mul_inv_apoint_plus_cx_mul_apoint_allshifted_12_r_t()(comp, info.xapoints[x], cx); +				} +				else +				{ +					//for(c = 0; c < ch; ++c) comp[c] >>= 4; +					typename scale_info_t::uroll_comp_rshftasgn_constval_t()(comp, 4); +				} + +				//for(c = 0; c < ch; ++c) *dptr++ = (comp[c]>>10)&0xff; +				typename scale_info_t::uroll_uref_dptr_inc_asgn_comp_rshft_cval_and_ff_t()(dptr, comp, 10); +			} +		} +	} +	else if(info.xup_yup == 2) +	{ // scaling down horizontally +		S32 Cx, j; +		S32 xap; + +		for(y = 0; y < dstH; y++) +		{ +			dptr = dst + (y * dstStride); + +			for(x = 0; x < dstW; x++) +			{ +				Cx = info.xapoints[x] >> 16; +				xap = info.xapoints[x] & 0xffff; + +				pix = info.ystrides[y] + info.xpoints[x] * ch; + +				//for(c = 0; c < ch; ++c) comp[c] = pix[c] * xap; +				typename scale_info_t::uroll_inp_asgn_pix_mul_val_t()(comp, pix, xap); + +				pix+=ch; +				for(j = (1 << 14) - xap; j > Cx; j -= Cx) +				{ +					//for(c = 0; c < ch; ++c) comp[c] += pix[c] * Cx; +					typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(comp, pix, Cx); +					pix+=ch; +				} + +				if(j > 0) +				{ +					//for(c = 0; c < ch; ++c) comp[c] += pix[c] * j; +					typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(comp, pix, j); +				} + +				if(info.yapoints[y] > 0) +				{ +					pix = info.ystrides[y] + info.xpoints[x]*ch + srcStride; +					//for(c = 0; c < ch; ++c) cx[c] = pix[c] * xap; +					typename scale_info_t::uroll_inp_asgn_pix_mul_val_t()(cx, pix, xap); + +					pix+=ch; +					for(j = (1 << 14) - xap; j > Cx; j -= Cx) +					{ +						//for(c = 0; c < ch; ++c) cx[c] += pix[c] * Cx; +						typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(cx, pix, Cx); +						pix+=ch; +					} + +					if(j > 0) +					{ +						//for(c = 0; c < ch; ++c) cx[c] += pix[c] * j; +						typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(cx, pix, j); +					} + +					//for(c = 0; c < ch; ++c) comp[c] = ((comp[c] * (256 - info.yapoints[y])) + ((cx[c] * info.yapoints[y]))) >> 12; +					typename scale_info_t::uroll_comp_asgn_comp_mul_inv_apoint_plus_cx_mul_apoint_allshifted_12_r_t()(comp, info.yapoints[y], cx); +				} +				else +				{ +					//for(c = 0; c < ch; ++c) comp[c] >>= 4; +					typename scale_info_t::uroll_comp_rshftasgn_constval_t()(comp, 4); +				} + +				//for(c = 0; c < ch; ++c) *dptr++ = (comp[c]>>10)&0xff; +				typename scale_info_t::uroll_uref_dptr_inc_asgn_comp_rshft_cval_and_ff_t()(dptr, comp, 10); +			} +		} +	} +	else  +	{ //scale x/y - down +		S32 Cx, Cy, i, j; +		S32 xap, yap; + +		for(y = 0; y < dstH; y++) +		{ +			Cy = info.yapoints[y] >> 16; +			yap = info.yapoints[y] & 0xffff; + +			dptr = dst + (y * dstStride); +			for(x = 0; x < dstW; x++) +			{ +				Cx = info.xapoints[x] >> 16; +				xap = info.xapoints[x] & 0xffff; + +				sptr = info.ystrides[y] + info.xpoints[x] * ch; +				pix = sptr; +				sptr += srcStride; + +				//for(c = 0; c < ch; ++c) cx[c] = pix[c] * xap; +				typename scale_info_t::uroll_inp_asgn_pix_mul_val_t()(cx, pix, xap); + +				pix+=ch; +				for(i = (1 << 14) - xap; i > Cx; i -= Cx) +				{ +					//for(c = 0; c < ch; ++c) cx[c] += pix[c] * Cx; +					typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(cx, pix, Cx); +					pix+=ch; +				} + +				if(i > 0) +				{ +					//for(c = 0; c < ch; ++c) cx[c] += pix[c] * i; +					typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(cx, pix, i); +				} + +				//for(c = 0; c < ch; ++c) comp[c] = (cx[c] >> 5) * yap; +				typename scale_info_t::uroll_comp_asgn_cx_rshft_cval_all_mul_val_t()(comp, cx, 5, yap); + +				for(j = (1 << 14) - yap; j > Cy; j -= Cy) +				{ +					pix = sptr; +					sptr += srcStride; + +					//for(c = 0; c < ch; ++c) cx[c] = pix[c] * xap; +					typename scale_info_t::uroll_inp_asgn_pix_mul_val_t()(cx, pix, xap); + +					pix+=ch; +					for(i = (1 << 14) - xap; i > Cx; i -= Cx) +					{ +						//for(c = 0; c < ch; ++c) cx[c] += pix[c] * Cx; +						typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(cx, pix, Cx); +						pix+=ch; +					} + +					if(i > 0) +					{ +						//for(c = 0; c < ch; ++c) cx[c] += pix[c] * i; +						typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(cx, pix, i); +					} + +					//for(c = 0; c < ch; ++c) comp[c] += (cx[c] >> 5) * Cy; +					typename scale_info_t::uroll_comp_plusasgn_cx_rshft_cval_all_mul_val_t()(comp, cx, 5, Cy); +				} + +				if(j > 0) +				{ +					pix = sptr; +					sptr += srcStride; + +					//for(c = 0; c < ch; ++c) cx[c] = pix[c] * xap; +					typename scale_info_t::uroll_inp_asgn_pix_mul_val_t()(cx, pix, xap); + +					pix+=ch; +					for(i = (1 << 14) - xap; i > Cx; i -= Cx) +					{ +						//for(c = 0; c < ch; ++c) cx[c] += pix[c] * Cx; +						typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(cx, pix, Cx); +						pix+=ch; +					} + +					if(i > 0) +					{ +						//for(c = 0; c < ch; ++c) cx[c] += pix[c] * i; +						typename scale_info_t::uroll_inp_plusasgn_pix_mul_val_t()(cx, pix, i); +					} + +					//for(c = 0; c < ch; ++c) comp[c] += (cx[c] >> 5) * j; +					typename scale_info_t::uroll_comp_plusasgn_cx_rshft_cval_all_mul_val_t()(comp, cx, 5, j); +				} + +				//for(c = 0; c < ch; ++c) *dptr++ = (comp[c]>>23)&0xff; +				typename scale_info_t::uroll_uref_dptr_inc_asgn_comp_rshft_cval_and_ff_t()(dptr, comp, 23); +			} +		} +	} //else +} + +//wrapper +static void bilinear_scale(const U8 *src, U32 srcW, U32 srcH, U32 srcCh, U32 srcStride, U8 *dst, U32 dstW, U32 dstH, U32 dstCh, U32 dstStride) +{ +	llassert(srcCh == dstCh); + +	switch(srcCh) +	{ +	case 1: +		bilinear_scale<1>(src, srcW, srcH, srcStride, dst, dstW, dstH, dstStride); +		break; +	case 3: +		bilinear_scale<3>(src, srcW, srcH, srcStride, dst, dstW, dstH, dstStride); +		break; +	case 4: +		bilinear_scale<4>(src, srcW, srcH, srcStride, dst, dstW, dstH, dstStride); +		break; +	default: +		llassert(!"Implement if need"); +		break; +	} + +} +  //---------------------------------------------------------------------------  // LLImage  //--------------------------------------------------------------------------- @@ -559,6 +1100,7 @@ void LLImageRaw::composite( LLImageRaw* src )  	}  } +  // Src and dst can be any size.  Src has 4 components.  Dst has 3 components.  void LLImageRaw::compositeScaled4onto3(LLImageRaw* src)  { @@ -589,21 +1131,6 @@ void LLImageRaw::compositeScaled4onto3(LLImageRaw* src)  // Src and dst are same size.  Src has 4 components.  Dst has 3 components.  void LLImageRaw::compositeUnscaled4onto3( LLImageRaw* src )  { -	/* -	//test fastFractionalMult() -	{ -		U8 i = 255; -		U8 j = 255; -		do -		{ -			do -			{ -				llassert( fastFractionalMult(i, j) == (U8)(255*(i/255.f)*(j/255.f) + 0.5f) ); -			} while( j-- ); -		} while( i-- ); -	} -	*/ -  	LLImageRaw* dst = this;  // Just for clarity.  	llassert( (3 == src->getComponents()) || (4 == src->getComponents()) ); @@ -639,6 +1166,7 @@ void LLImageRaw::compositeUnscaled4onto3( LLImageRaw* src )  	}  } +  void LLImageRaw::copyUnscaledAlphaMask( LLImageRaw* src, const LLColor4U& fill)  {  	LLImageRaw* dst = this;  // Just for clarity. @@ -846,6 +1374,12 @@ void LLImageRaw::copyScaled( LLImageRaw* src )  		return;  	} +	bilinear_scale( +			src->getData(), src->getWidth(), src->getHeight(), src->getComponents(), src->getWidth()*src->getComponents() +		,	dst->getData(), dst->getWidth(), dst->getHeight(), dst->getComponents(), dst->getWidth()*dst->getComponents() +	); + +	/*  	S32 temp_data_size = src->getWidth() * dst->getHeight() * getComponents();  	llassert_always(temp_data_size > 0);  	std::vector<U8> temp_buffer(temp_data_size); @@ -861,6 +1395,7 @@ void LLImageRaw::copyScaled( LLImageRaw* src )  	{  		copyLineScaled( &temp_buffer[0] + (getComponents() * src->getWidth() * row), dst->getData() + (getComponents() * dst->getWidth() * row), src->getWidth(), dst->getWidth(), 1, 1 );  	} +	*/  } @@ -880,6 +1415,7 @@ BOOL LLImageRaw::scale( S32 new_width, S32 new_height, BOOL scale_image_data )  	if (scale_image_data)  	{ +		/*  		S32 temp_data_size = old_width * new_height * getComponents();  		llassert_always(temp_data_size > 0);  		std::vector<U8> temp_buffer(temp_data_size); @@ -899,6 +1435,19 @@ BOOL LLImageRaw::scale( S32 new_width, S32 new_height, BOOL scale_image_data )  		{  			copyLineScaled( &temp_buffer[0] + (getComponents() * old_width * row), new_buffer + (getComponents() * new_width * row), old_width, new_width, 1, 1 );  		} +		*/ + +		S32 new_data_size = new_width * new_height * getComponents(); +		llassert_always(new_data_size > 0); + +		U8 *new_data = (U8*)ALLOCATE_MEM(LLImageBase::getPrivatePool(), new_data_size);  +		if(NULL == new_data)  +		{ +			return FALSE;  +		} + +		bilinear_scale(getData(), old_width, old_height, getComponents(), old_width*getComponents(), new_data, new_width, new_height, getComponents(), new_width*getComponents()); +		setDataAndSize(new_data, new_width, new_height, getComponents());   	}  	else  	{ | 
