diff options
Diffstat (limited to 'indra/llmath')
62 files changed, 6939 insertions, 1813 deletions
| diff --git a/indra/llmath/CMakeLists.txt b/indra/llmath/CMakeLists.txt index e93fe90650..b5e59c1ca3 100644 --- a/indra/llmath/CMakeLists.txt +++ b/indra/llmath/CMakeLists.txt @@ -12,16 +12,21 @@ include_directories(  set(llmath_SOURCE_FILES      llbbox.cpp      llbboxlocal.cpp +    llcalc.cpp +    llcalcparser.cpp      llcamera.cpp      llcoordframe.cpp      llline.cpp +    llmatrix3a.cpp      llmodularmath.cpp      llperlin.cpp      llquaternion.cpp      llrect.cpp      llsphere.cpp +    llvector4a.cpp      llvolume.cpp      llvolumemgr.cpp +    llvolumeoctree.cpp      llsdutil_math.cpp      m3math.cpp      m4math.cpp @@ -43,27 +48,36 @@ set(llmath_HEADER_FILES      coordframe.h      llbbox.h      llbboxlocal.h +    llcalc.h +    llcalcparser.h      llcamera.h      llcoord.h      llcoordframe.h      llinterp.h      llline.h      llmath.h +    llmatrix3a.h +    llmatrix3a.inl      llmodularmath.h      lloctree.h      llperlin.h      llplane.h      llquantize.h      llquaternion.h +    llquaternion2.h +    llquaternion2.inl      llrect.h +    llsimdmath.h +    llsimdtypes.h +    llsimdtypes.inl      llsphere.h      lltreenode.h -    llv4math.h -    llv4matrix3.h -    llv4matrix4.h -    llv4vector3.h +    llvector4a.h +    llvector4a.inl +    llvector4logical.h      llvolume.h      llvolumemgr.h +    llvolumeoctree.h      llsdutil_math.h      m3math.h      m4math.h diff --git a/indra/llmath/llbbox.cpp b/indra/llmath/llbbox.cpp index b46a6e03d2..3e2c05a6e6 100644 --- a/indra/llmath/llbbox.cpp +++ b/indra/llmath/llbbox.cpp @@ -89,6 +89,19 @@ void LLBBox::addBBoxAgent(const LLBBox& b)  	}  } +LLBBox LLBBox::getAxisAligned() const +{ +	// no rotation = axis aligned rotation +	LLBBox aligned(mPosAgent, LLQuaternion(), LLVector3(), LLVector3()); + +	// add the center point so that it's not empty +	aligned.addPointAgent(mPosAgent); + +	// add our BBox +	aligned.addBBoxAgent(*this); + +	return aligned; +}  void LLBBox::expand( F32 delta )  { @@ -147,6 +160,15 @@ BOOL LLBBox::containsPointAgent(const LLVector3& p) const  	return containsPointLocal(point_local);  } +LLVector3 LLBBox::getMinAgent() const +{ +	return localToAgent(mMinLocal); +} + +LLVector3 LLBBox::getMaxAgent() const +{ +	return localToAgent(mMaxLocal); +}  /*  LLBBox operator*(const LLBBox &a, const LLMatrix4 &b) diff --git a/indra/llmath/llbbox.h b/indra/llmath/llbbox.h index 5b911793f0..28e69b75e1 100644 --- a/indra/llmath/llbbox.h +++ b/indra/llmath/llbbox.h @@ -51,9 +51,11 @@ public:  	const LLVector3&	getPositionAgent() const			{ return mPosAgent; }  	const LLQuaternion&	getRotation() const					{ return mRotation; } +	LLVector3           getMinAgent() const;  	const LLVector3&	getMinLocal() const					{ return mMinLocal; }  	void				setMinLocal( const LLVector3& min )	{ mMinLocal = min; } +	LLVector3           getMaxAgent() const;  	const LLVector3&	getMaxLocal() const					{ return mMaxLocal; }  	void				setMaxLocal( const LLVector3& max )	{ mMaxLocal = max; } @@ -80,6 +82,8 @@ public:  	LLVector3			localToAgentBasis(const LLVector3& v) const;  	LLVector3			agentToLocalBasis(const LLVector3& v) const; +	// Get the smallest possible axis aligned bbox that contains this bbox +	LLBBox              getAxisAligned() const;  //	friend LLBBox operator*(const LLBBox& a, const LLMatrix4& b); diff --git a/indra/llmath/llcalc.cpp b/indra/llmath/llcalc.cpp new file mode 100644 index 0000000000..1b2d609b67 --- /dev/null +++ b/indra/llmath/llcalc.cpp @@ -0,0 +1,162 @@ +/* + *  LLCalc.cpp + * Copyright 2008 Aimee Walton. + * $LicenseInfo:firstyear=2008&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2008, Linden Research, Inc. + *  + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + *  + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * Lesser General Public License for more details. + *  + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA + *  + * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA + * $/LicenseInfo$ + * + */ + +#include "linden_common.h" + +#include "llcalc.h" + +#include "llcalcparser.h" +#include "llmath.h" + + +// Variable names for use in the build floater +const char* LLCalc::X_POS = "PX"; +const char* LLCalc::Y_POS = "PY"; +const char* LLCalc::Z_POS = "PZ"; +const char* LLCalc::X_SCALE = "SX"; +const char* LLCalc::Y_SCALE = "SY"; +const char* LLCalc::Z_SCALE = "SZ"; +const char* LLCalc::X_ROT = "RX"; +const char* LLCalc::Y_ROT = "RY"; +const char* LLCalc::Z_ROT = "RZ"; +const char* LLCalc::HOLLOW = "HLW"; +const char* LLCalc::CUT_BEGIN = "CB"; +const char* LLCalc::CUT_END = "CE"; +const char* LLCalc::PATH_BEGIN = "PB"; +const char* LLCalc::PATH_END = "PE"; +const char* LLCalc::TWIST_BEGIN = "TB"; +const char* LLCalc::TWIST_END = "TE"; +const char* LLCalc::X_SHEAR = "SHX"; +const char* LLCalc::Y_SHEAR = "SHY"; +const char* LLCalc::X_TAPER = "TPX"; +const char* LLCalc::Y_TAPER = "TPY"; +const char* LLCalc::RADIUS_OFFSET = "ROF"; +const char* LLCalc::REVOLUTIONS = "REV"; +const char* LLCalc::SKEW = "SKW"; +const char* LLCalc::X_HOLE = "HLX"; +const char* LLCalc::Y_HOLE = "HLY"; +const char* LLCalc::TEX_U_SCALE = "TSU"; +const char* LLCalc::TEX_V_SCALE = "TSV"; +const char* LLCalc::TEX_U_OFFSET = "TOU"; +const char* LLCalc::TEX_V_OFFSET = "TOV"; +const char* LLCalc::TEX_ROTATION = "TROT"; +const char* LLCalc::TEX_TRANSPARENCY = "TRNS"; +const char* LLCalc::TEX_GLOW = "GLOW"; + + +LLCalc* LLCalc::sInstance = NULL; + +LLCalc::LLCalc() : mLastErrorPos(0) +{ +	// Init table of constants +	mConstants["PI"] = F_PI; +	mConstants["TWO_PI"] = F_TWO_PI; +	mConstants["PI_BY_TWO"] = F_PI_BY_TWO; +	mConstants["SQRT_TWO_PI"] = F_SQRT_TWO_PI; +	mConstants["SQRT2"] = F_SQRT2; +	mConstants["SQRT3"] = F_SQRT3; +	mConstants["DEG_TO_RAD"] = DEG_TO_RAD; +	mConstants["RAD_TO_DEG"] = RAD_TO_DEG; +	mConstants["GRAVITY"] = GRAVITY; +} + +LLCalc::~LLCalc() +{ +} + +//static +void LLCalc::cleanUp() +{ +	delete sInstance; +	sInstance = NULL; +} + +//static +LLCalc* LLCalc::getInstance() +{ +    if (!sInstance)	sInstance = new LLCalc(); +	return sInstance; +} + +void LLCalc::setVar(const std::string& name, const F32& value) +{ +	mVariables[name] = value; +} + +void LLCalc::clearVar(const std::string& name) +{ +	mVariables.erase(name); +} + +void LLCalc::clearAllVariables() +{ +	mVariables.clear(); +} + +/* +void LLCalc::updateVariables(LLSD& vars) +{ +	LLSD::map_iterator cIt = vars.beginMap(); +	for(; cIt != vars.endMap(); cIt++) +	{ +		setVar(cIt->first, (F32)(LLSD::Real)cIt->second); +	} +} +*/ + +bool LLCalc::evalString(const std::string& expression, F32& result) +{ +	std::string expr_upper = expression; +	LLStringUtil::toUpper(expr_upper); +	 +	LLCalcParser calc(result, &mConstants, &mVariables); + +	mLastErrorPos = 0; +	std::string::iterator start = expr_upper.begin(); + 	parse_info<std::string::iterator> info; +	 +	try +	{ +		info = parse(start, expr_upper.end(), calc, space_p); +		lldebugs << "Math expression: " << expression << " = " << result << llendl; +	} +	catch(parser_error<std::string, std::string::iterator> &e) +	{ +		mLastErrorPos = e.where - expr_upper.begin(); +		 +		llinfos << "Calc parser exception: " << e.descriptor << " at " << mLastErrorPos << " in expression: " << expression << llendl; +		return false; +	} +	 +	if (!info.full) +	{ +		mLastErrorPos = info.stop - expr_upper.begin(); +		llinfos << "Unhandled syntax error at " << mLastErrorPos << " in expression: " << expression << llendl; +		return false; +	} +	 +	return true; +} diff --git a/indra/llmath/llcalc.h b/indra/llmath/llcalc.h new file mode 100644 index 0000000000..ceb9dce585 --- /dev/null +++ b/indra/llmath/llcalc.h @@ -0,0 +1,100 @@ +/* + *  LLCalc.h + *  Copyright 2008 Aimee Walton. + * $LicenseInfo:firstyear=2008&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2008, Linden Research, Inc. + *  + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + *  + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * Lesser General Public License for more details. + *  + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA + *  + * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA + * $/LicenseInfo$ + * + */ + +#ifndef LL_CALC_H +#define LL_CALC_H + +#include <map> +#include <string> + +class LLCalc +{ +public: +	LLCalc(); +	~LLCalc(); + +	// Variable name constants +	static const char* X_POS; +	static const char* Y_POS; +	static const char* Z_POS; +	static const char* X_SCALE; +	static const char* Y_SCALE; +	static const char* Z_SCALE; +	static const char* X_ROT; +	static const char* Y_ROT; +	static const char* Z_ROT; +	static const char* HOLLOW; +	static const char* CUT_BEGIN; +	static const char* CUT_END; +	static const char* PATH_BEGIN; +	static const char* PATH_END; +	static const char* TWIST_BEGIN; +	static const char* TWIST_END; +	static const char* X_SHEAR; +	static const char* Y_SHEAR; +	static const char* X_TAPER; +	static const char* Y_TAPER; +	static const char* RADIUS_OFFSET; +	static const char* REVOLUTIONS; +	static const char* SKEW; +	static const char* X_HOLE; +	static const char* Y_HOLE; +	static const char* TEX_U_SCALE; +	static const char* TEX_V_SCALE; +	static const char* TEX_U_OFFSET; +	static const char* TEX_V_OFFSET; +	static const char* TEX_ROTATION; +	static const char* TEX_TRANSPARENCY; +	static const char* TEX_GLOW; + +	void	setVar(const std::string& name, const F32& value); +	void	clearVar(const std::string& name); +	void	clearAllVariables(); +//	void	updateVariables(LLSD& vars); + +	bool	evalString(const std::string& expression, F32& result); +	std::string::size_type	getLastErrorPos()	{ return mLastErrorPos; } +	 +	static LLCalc* getInstance(); +	static void cleanUp(); + +	typedef	std::map<std::string, F32> calc_map_t; +	 +private: +	std::string::size_type	mLastErrorPos; +	 +	calc_map_t	mConstants; +	calc_map_t	mVariables; +	 +	// *TODO: Add support for storing user defined variables, and stored functions. +	//	Will need UI work, and a means to save them between sessions. +//	calc_map_t mUserVariables; +	 +	// "There shall be only one" +	static LLCalc*	sInstance; +}; + +#endif // LL_CALC_H diff --git a/indra/llmath/llcalcparser.cpp b/indra/llmath/llcalcparser.cpp new file mode 100644 index 0000000000..b4ca320659 --- /dev/null +++ b/indra/llmath/llcalcparser.cpp @@ -0,0 +1,63 @@ +/* + *  LLCalcParser.cpp + *  Copyright 2008 Aimee Walton. + * $LicenseInfo:firstyear=2008&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2008, Linden Research, Inc. + *  + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + *  + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * Lesser General Public License for more details. + *  + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA + *  + * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA + * $/LicenseInfo$ + * + */ + +#include "linden_common.h" + +#include "llcalcparser.h" +using namespace boost::spirit::classic; + +F32 LLCalcParser::lookup(const std::string::iterator& start, const std::string::iterator& end) const +{ +	LLCalc::calc_map_t::iterator iter; + +	std::string name(start, end); +	 +	if (mConstants) +	{ +		iter = mConstants->find(name); +		if (iter != mConstants->end()) +		{ +			return (*iter).second; +		} +	} +	else +	{ +		// This should never happen! +		throw_(end, std::string("Missing constants table")); +	} +	 +	if (mVariables) +	{ +		iter = mVariables->find(name); +		if (iter != mVariables->end()) +		{ +			return (*iter).second; +		} +	} +	 +	throw_(end, std::string("Unknown symbol " + name)); +	return 0.f; +} diff --git a/indra/llmath/llcalcparser.h b/indra/llmath/llcalcparser.h new file mode 100644 index 0000000000..e0ad270266 --- /dev/null +++ b/indra/llmath/llcalcparser.h @@ -0,0 +1,191 @@ +/* + *  LLCalcParser.h + *  Copyright 2008 Aimee Walton. + * $LicenseInfo:firstyear=2008&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2008, Linden Research, Inc. + *  + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + *  + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * Lesser General Public License for more details. + *  + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA + *  + * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA + * $/LicenseInfo$ + * + */ + +#ifndef LL_CALCPARSER_H +#define LL_CALCPARSER_H + +#include <boost/spirit/include/classic_attribute.hpp> +#include <boost/spirit/include/classic_core.hpp> +#include <boost/spirit/include/classic_error_handling.hpp> +#include <boost/spirit/include/classic_position_iterator.hpp> +#include <boost/spirit/include/phoenix1_binders.hpp> +#include <boost/spirit/include/classic_symbols.hpp> +using namespace boost::spirit::classic; + +#include "llcalc.h" +#include "llmath.h" + +struct LLCalcParser : grammar<LLCalcParser> +{ +	LLCalcParser(F32& result, LLCalc::calc_map_t* constants, LLCalc::calc_map_t* vars) : +		mResult(result), mConstants(constants), mVariables(vars) {}; +	 +	struct value_closure : closure<value_closure, F32> +	{ +		member1 value; +	}; +	 +	template <typename ScannerT> +	struct definition +	{ +		// Rule declarations +		rule<ScannerT> statement, identifier; +		rule<ScannerT, value_closure::context_t> expression, term, +			power,  +			unary_expr,  +			factor,  +			unary_func,  +			binary_func, +			group; + +		// start() should return the starting symbol +		rule<ScannerT> const& start() const { return statement; } +		 +		definition(LLCalcParser const& self) +		{ +			using namespace phoenix; +			 +			assertion<std::string> assert_domain("Domain error"); +//			assertion<std::string> assert_symbol("Unknown symbol"); +			assertion<std::string> assert_syntax("Syntax error"); +			 +			identifier = +				lexeme_d[(alpha_p | '_') >> *(alnum_p | '_')] +			; +			 +			group = +				'(' >> expression[group.value = arg1] >> assert_syntax(ch_p(')')) +			; + +			unary_func = +				((str_p("SIN") >> '(' >> expression[unary_func.value = bind(&LLCalcParser::_sin)(self,arg1)]) | +				 (str_p("COS") >> '(' >> expression[unary_func.value = bind(&LLCalcParser::_cos)(self,arg1)]) | +				 (str_p("TAN") >> '(' >> expression[unary_func.value = bind(&LLCalcParser::_tan)(self,arg1)]) | +				 (str_p("ASIN") >> '(' >> expression[unary_func.value = bind(&LLCalcParser::_asin)(self,arg1)]) | +				 (str_p("ACOS") >> '(' >> expression[unary_func.value = bind(&LLCalcParser::_acos)(self,arg1)]) | +				 (str_p("ATAN") >> '(' >> expression[unary_func.value = bind(&LLCalcParser::_atan)(self,arg1)]) | +				 (str_p("SQRT") >> '(' >> expression[unary_func.value = bind(&LLCalcParser::_sqrt)(self,arg1)]) | +				 (str_p("LOG") >> '(' >> expression[unary_func.value = bind(&LLCalcParser::_log)(self,arg1)]) | +				 (str_p("EXP") >> '(' >> expression[unary_func.value = bind(&LLCalcParser::_exp)(self,arg1)]) | +				 (str_p("ABS") >> '(' >> expression[unary_func.value = bind(&LLCalcParser::_fabs)(self,arg1)]) | +				 (str_p("FLR") >> '(' >> expression[unary_func.value = bind(&LLCalcParser::_floor)(self,arg1)]) | +				 (str_p("CEIL") >> '(' >> expression[unary_func.value = bind(&LLCalcParser::_ceil)(self,arg1)]) +				) >> assert_syntax(ch_p(')')) +			; +			 +			binary_func = +				((str_p("ATAN2") >> '(' >> expression[binary_func.value = arg1] >> ',' >> +				  expression[binary_func.value = bind(&LLCalcParser::_atan2)(self, binary_func.value, arg1)]) | +				 (str_p("MIN") >> '(' >> expression[binary_func.value = arg1] >> ',' >>  +				  expression[binary_func.value = bind(&LLCalcParser::_min)(self, binary_func.value, arg1)]) | +				 (str_p("MAX") >> '(' >> expression[binary_func.value = arg1] >> ',' >>  +				  expression[binary_func.value = bind(&LLCalcParser::_max)(self, binary_func.value, arg1)]) +				) >> assert_syntax(ch_p(')')) +			; +			 +			// *TODO: Localisation of the decimal point? +			// Problem, LLLineEditor::postvalidateFloat accepts a comma when appropriate +			// for the current locale. However to do that here could clash with using +			// the comma as a separator when passing arguments to functions. +			factor = +				(ureal_p[factor.value = arg1] | +				 group[factor.value = arg1] | +				 unary_func[factor.value = arg1] | +				 binary_func[factor.value = arg1] | +				 // Lookup throws an Unknown Symbol error if it is unknown, while this works fine, +				 // would be "neater" to handle symbol lookup from here with an assertive parser. +//				 constants_p[factor.value = arg1]| +				 identifier[factor.value = bind(&LLCalcParser::lookup)(self, arg1, arg2)] +				) >> +				// Detect and throw math errors. +				assert_domain(eps_p(bind(&LLCalcParser::checkNaN)(self, factor.value))) +			; + +			unary_expr = +				!ch_p('+') >> factor[unary_expr.value = arg1] | +				'-' >> factor[unary_expr.value = -arg1] +			; +			 +			power = +				unary_expr[power.value = arg1] >> +				*('^' >> assert_syntax(unary_expr[power.value = bind(&powf)(power.value, arg1)])) +			; +			 +			term = +				power[term.value = arg1] >> +				*(('*' >> assert_syntax(power[term.value *= arg1])) | +				  ('/' >> assert_syntax(power[term.value /= arg1])) | +				  ('%' >> assert_syntax(power[term.value = bind(&fmodf)(term.value, arg1)])) +				) +			; +			 +			expression = +				assert_syntax(term[expression.value = arg1]) >> +				*(('+' >> assert_syntax(term[expression.value += arg1])) | +				  ('-' >> assert_syntax(term[expression.value -= arg1])) +				) +			; + +			statement = +				!ch_p('=') >> ( expression )[var(self.mResult) = arg1] >> (end_p) +			; +		} +	}; +	 +private: +	// Member functions for semantic actions +	F32	lookup(const std::string::iterator&, const std::string::iterator&) const; +	F32 _min(const F32& a, const F32& b) const { return llmin(a, b); } +	F32 _max(const F32& a, const F32& b) const { return llmax(a, b); } +	 +	bool checkNaN(const F32& a) const { return !llisnan(a); } +	 +	//FIX* non ambigious function fix making SIN() work for calc -Cryogenic Blitz +	F32 _sin(const F32& a) const { return sin(DEG_TO_RAD * a); } +	F32 _cos(const F32& a) const { return cos(DEG_TO_RAD * a); } +	F32 _tan(const F32& a) const { return tan(DEG_TO_RAD * a); } +	F32 _asin(const F32& a) const { return asin(a * RAD_TO_DEG); } +	F32 _acos(const F32& a) const { return acos(a * RAD_TO_DEG); } +	F32 _atan(const F32& a) const { return atan(a * RAD_TO_DEG); } +	F32 _sqrt(const F32& a) const { return sqrt(a); } +	F32 _log(const F32& a) const { return log(a); } +	F32 _exp(const F32& a) const { return exp(a); } +	F32 _fabs(const F32& a) const { return fabs(a); } +	F32 _floor(const F32& a) const { return (F32)llfloor(a); } +	F32 _ceil(const F32& a) const { return llceil(a); } + +	F32 _atan2(const F32& a,const F32& b) const { return atan2(a,b); } + + + +	LLCalc::calc_map_t* mConstants; +	LLCalc::calc_map_t* mVariables; +//	LLCalc::calc_map_t* mUserVariables; +	 +	F32&		mResult; +}; + +#endif // LL_CALCPARSER_H diff --git a/indra/llmath/llcamera.cpp b/indra/llmath/llcamera.cpp index bad4d00fd6..22ba26f99b 100644 --- a/indra/llmath/llcamera.cpp +++ b/indra/llmath/llcamera.cpp @@ -45,7 +45,6 @@ LLCamera::LLCamera() :  	calculateFrustumPlanes();  }  -  LLCamera::LLCamera(F32 vertical_fov_rads, F32 aspect_ratio, S32 view_height_in_pixels, F32 near_plane, F32 far_plane) :  	LLCoordFrame(),  	mViewHeightInPixels(view_height_in_pixels), @@ -61,6 +60,10 @@ LLCamera::LLCamera(F32 vertical_fov_rads, F32 aspect_ratio, S32 view_height_in_p  	setView(vertical_fov_rads);  }  +LLCamera::~LLCamera() +{ + +}  // ---------------- LLCamera::getFoo() member functions ---------------- @@ -82,11 +85,11 @@ F32 LLCamera::getMaxView() const  // ---------------- LLCamera::setFoo() member functions ---------------- -void LLCamera::setUserClipPlane(LLPlane plane) +void LLCamera::setUserClipPlane(LLPlane& plane)  {  	mPlaneCount = 7; -	mAgentPlanes[6].p = plane; -	mAgentPlanes[6].mask = calcPlaneMask(plane); +	mAgentPlanes[6] = plane; +	mPlaneMask[6] = plane.calcPlaneMask();  }  void LLCamera::disableUserClipPlane() @@ -158,166 +161,91 @@ size_t LLCamera::readFrustumFromBuffer(const char *buffer)  // ---------------- test methods  ----------------  -S32 LLCamera::AABBInFrustum(const LLVector3 ¢er, const LLVector3& radius)  +S32 LLCamera::AABBInFrustum(const LLVector4a ¢er, const LLVector4a& radius)   { -	static const LLVector3 scaler[] = { -		LLVector3(-1,-1,-1), -		LLVector3( 1,-1,-1), -		LLVector3(-1, 1,-1), -		LLVector3( 1, 1,-1), -		LLVector3(-1,-1, 1), -		LLVector3( 1,-1, 1), -		LLVector3(-1, 1, 1), -		LLVector3( 1, 1, 1) +	static const LLVector4a scaler[] = { +		LLVector4a(-1,-1,-1), +		LLVector4a( 1,-1,-1), +		LLVector4a(-1, 1,-1), +		LLVector4a( 1, 1,-1), +		LLVector4a(-1,-1, 1), +		LLVector4a( 1,-1, 1), +		LLVector4a(-1, 1, 1), +		LLVector4a( 1, 1, 1)  	};  	U8 mask = 0; -	S32 result = 2; - -	/*if (mFrustumCornerDist > 0.f && radius.magVecSquared() > mFrustumCornerDist * mFrustumCornerDist) -	{ //box is larger than frustum, check frustum quads against box planes - -		static const LLVector3 dir[] =  -		{ -			LLVector3(1, 0, 0), -			LLVector3(-1, 0, 0), -			LLVector3(0, 1, 0), -			LLVector3(0, -1, 0), -			LLVector3(0, 0, 1), -			LLVector3(0, 0, -1) -		}; - -		U32 quads[] =  -		{ -			0, 1, 2, 3, -			0, 1, 5, 4, -			2, 3, 7, 6, -			3, 0, 7, 4, -			1, 2, 6, 4, -			4, 5, 6, 7 -		}; - -		result = 0; - -		BOOL total_inside = TRUE; -		for (U32 i = 0; i < 6; i++) -		{  -			LLVector3 p = center + radius.scaledVec(dir[i]); -			F32 d = -p*dir[i]; - -			for (U32 j = 0; j <	6; j++) -			{ //for each quad -				F32 dist = mAgentFrustum[quads[j*4+0]]*dir[i] + d; -				if (dist > 0) -				{ //at least one frustum point is outside the AABB -					total_inside = FALSE; -					for (U32 k = 1; k < 4; k++) -					{ //for each other point on quad -						if ( mAgentFrustum[quads[j*4+k]]*dir[i]+d  <= 0.f) -						{ //quad is straddling some plane of AABB -							return 1; -						} -					} -				} -				else -				{ -					for (U32 k = 1; k < 4; k++) -					{ -						if (mAgentFrustum[quads[j*4+k]]*dir[i]+d > 0.f) -						{ -							return 1; -						} -					} -				} -			} -		} - -		if (total_inside) -		{ -			result = 1; -		} -	} -	else*/ +	bool result = false; +	LLVector4a rscale, maxp, minp; +	LLSimdScalar d; +	for (U32 i = 0; i < mPlaneCount; i++)  	{ -		for (U32 i = 0; i < mPlaneCount; i++) +		mask = mPlaneMask[i]; +		if (mask != 0xff)  		{ -			mask = mAgentPlanes[i].mask; -			if (mask == 0xff) -			{ -				continue; -			} -			LLPlane p = mAgentPlanes[i].p; -			LLVector3 n = LLVector3(p); -			float d = p.mV[3]; -			LLVector3 rscale = radius.scaledVec(scaler[mask]); - -			LLVector3 minp = center - rscale; -			LLVector3 maxp = center + rscale; - -			if (n * minp > -d)  +			const LLPlane& p(mAgentPlanes[i]); +			p.getAt<3>(d); +			rscale.setMul(radius, scaler[mask]); +			minp.setSub(center, rscale); +			d = -d; +			if (p.dot3(minp).getF32() > d)   			{  				return 0;  			} -		 -			if (n * maxp > -d) +			 +			if(!result)  			{ -				result = 1; +				maxp.setAdd(center, rscale); +				result = (p.dot3(maxp).getF32() > d);  			}  		}  	} -	 -	return result; +	return result?1:2;  } -S32 LLCamera::AABBInFrustumNoFarClip(const LLVector3 ¢er, const LLVector3& radius)  + +S32 LLCamera::AABBInFrustumNoFarClip(const LLVector4a& center, const LLVector4a& radius)   { -	static const LLVector3 scaler[] = { -		LLVector3(-1,-1,-1), -		LLVector3( 1,-1,-1), -		LLVector3(-1, 1,-1), -		LLVector3( 1, 1,-1), -		LLVector3(-1,-1, 1), -		LLVector3( 1,-1, 1), -		LLVector3(-1, 1, 1), -		LLVector3( 1, 1, 1) +	static const LLVector4a scaler[] = { +		LLVector4a(-1,-1,-1), +		LLVector4a( 1,-1,-1), +		LLVector4a(-1, 1,-1), +		LLVector4a( 1, 1,-1), +		LLVector4a(-1,-1, 1), +		LLVector4a( 1,-1, 1), +		LLVector4a(-1, 1, 1), +		LLVector4a( 1, 1, 1)  	};  	U8 mask = 0; -	S32 result = 2; - +	bool result = false; +	LLVector4a rscale, maxp, minp; +	LLSimdScalar d;  	for (U32 i = 0; i < mPlaneCount; i++)  	{ -		if (i == 5) -		{ -			continue; -		} - -		mask = mAgentPlanes[i].mask; -		if (mask == 0xff) -		{ -			continue; -		} -		LLPlane p = mAgentPlanes[i].p; -		LLVector3 n = LLVector3(p); -		float d = p.mV[3]; -		LLVector3 rscale = radius.scaledVec(scaler[mask]); - -		LLVector3 minp = center - rscale; -		LLVector3 maxp = center + rscale; - -		if (n * minp > -d)  +		mask = mPlaneMask[i]; +		if ((i != 5) && (mask != 0xff))  		{ -			return 0; -		} -	 -		if (n * maxp > -d) -		{ -			result = 1; +			const LLPlane& p(mAgentPlanes[i]); +			p.getAt<3>(d); +			rscale.setMul(radius, scaler[mask]); +			minp.setSub(center, rscale); +			d = -d; +			if (p.dot3(minp).getF32() > d)  +			{ +				return 0; +			} +			 +			if(!result) +			{ +				maxp.setAdd(center, rscale); +				result = (p.dot3(maxp).getF32() > d); +			}  		}  	} -	return result; +	return result?1:2;  }  int LLCamera::sphereInFrustumQuick(const LLVector3 &sphere_center, const F32 radius)  @@ -438,28 +366,22 @@ int LLCamera::sphereInFrustumOld(const LLVector3 &sphere_center, const F32 radiu  int LLCamera::sphereInFrustum(const LLVector3 &sphere_center, const F32 radius) const   {  	// Returns 1 if sphere is in frustum, 0 if not. -	int res = 2; +	bool res = false;  	for (int i = 0; i < 6; i++)  	{ -		if (mAgentPlanes[i].mask == 0xff) +		if (mPlaneMask[i] != 0xff)  		{ -			continue; -		} - -		float d = mAgentPlanes[i].p.dist(sphere_center); +			float d = mAgentPlanes[i].dist(sphere_center); -		if (d > radius)  -		{ -			return 0; -		} - -		if (d > -radius) -		{ -			res = 1; +			if (d > radius)  +			{ +				return 0; +			} +			res = res || (d > -radius);  		}  	} -	return res; +	return res?1:2;  } @@ -611,25 +533,6 @@ LLPlane planeFromPoints(LLVector3 p1, LLVector3 p2, LLVector3 p3)  	return LLPlane(p1, n);  } -U8 LLCamera::calcPlaneMask(const LLPlane& plane) -{ -	U8 mask = 0; -	 -	if (plane.mV[0] >= 0) -	{ -		mask |= 1; -	} -	if (plane.mV[1] >= 0) -	{ -		mask |= 2; -	} -	if (plane.mV[2] >= 0) -	{ -		mask |= 4; -	} - -	return mask; -}  void LLCamera::ignoreAgentFrustumPlane(S32 idx)  { @@ -638,12 +541,13 @@ void LLCamera::ignoreAgentFrustumPlane(S32 idx)  		return;  	} -	mAgentPlanes[idx].mask = 0xff; -	mAgentPlanes[idx].p.clearVec(); +	mPlaneMask[idx] = 0xff; +	mAgentPlanes[idx].clear();  }  void LLCamera::calcAgentFrustumPlanes(LLVector3* frust)  { +	  	for (int i = 0; i < 8; i++)  	{  		mAgentFrustum[i] = frust[i]; @@ -656,27 +560,27 @@ void LLCamera::calcAgentFrustumPlanes(LLVector3* frust)  	//order of planes is important, keep most likely to fail in the front of the list  	//near - frust[0], frust[1], frust[2] -	mAgentPlanes[2].p = planeFromPoints(frust[0], frust[1], frust[2]); +	mAgentPlanes[2] = planeFromPoints(frust[0], frust[1], frust[2]);  	//far   -	mAgentPlanes[5].p = planeFromPoints(frust[5], frust[4], frust[6]); +	mAgentPlanes[5] = planeFromPoints(frust[5], frust[4], frust[6]);  	//left   -	mAgentPlanes[0].p = planeFromPoints(frust[4], frust[0], frust[7]); +	mAgentPlanes[0] = planeFromPoints(frust[4], frust[0], frust[7]);  	//right   -	mAgentPlanes[1].p = planeFromPoints(frust[1], frust[5], frust[6]); +	mAgentPlanes[1] = planeFromPoints(frust[1], frust[5], frust[6]);  	//top   -	mAgentPlanes[4].p = planeFromPoints(frust[3], frust[2], frust[6]); +	mAgentPlanes[4] = planeFromPoints(frust[3], frust[2], frust[6]);  	//bottom   -	mAgentPlanes[3].p = planeFromPoints(frust[1], frust[0], frust[4]); +	mAgentPlanes[3] = planeFromPoints(frust[1], frust[0], frust[4]);  	//cache plane octant facing mask for use in AABBInFrustum  	for (U32 i = 0; i < mPlaneCount; i++)  	{ -		mAgentPlanes[i].mask = calcPlaneMask(mAgentPlanes[i].p); +		mPlaneMask[i] = mAgentPlanes[i].calcPlaneMask();  	}  } @@ -703,6 +607,7 @@ void LLCamera::calculateFrustumPlanes(F32 left, F32 right, F32 top, F32 bottom)  	mLocalPlanes[PLANE_BOTTOM].setVec( a, c, b);   	//calculate center and radius squared of frustum in world absolute coordinates +	static LLVector3 const X_AXIS(1.f, 0.f, 0.f);  	mFrustCenter = X_AXIS*mFarPlane*0.5f;  	mFrustCenter = transformToAbsolute(mFrustCenter);  	mFrustRadiusSquared = mFarPlane*0.5f; @@ -729,9 +634,10 @@ void LLCamera::calculateWorldFrustumPlanes()  	F32 d;  	LLVector3 center = mOrigin - mXAxis*mNearPlane;  	mWorldPlanePos = center; +	LLVector3 pnorm;	  	for (int p=0; p<4; p++)  	{ -		LLVector3 pnorm = LLVector3(mLocalPlanes[p]); +		mLocalPlanes[p].getVector3(pnorm);  		LLVector3 norm = rotateToAbsolute(pnorm);  		norm.normVec();  		d = -(center * norm); @@ -741,13 +647,15 @@ void LLCamera::calculateWorldFrustumPlanes()  	LLVector3 zaxis(0, 0, 1.0f);  	F32 yaw = getYaw();  	{ -		LLVector3 tnorm = LLVector3(mLocalPlanes[PLANE_LEFT]); +		LLVector3 tnorm; +		mLocalPlanes[PLANE_LEFT].getVector3(tnorm);  		tnorm.rotVec(yaw, zaxis);  		d = -(mOrigin * tnorm);  		mHorizPlanes[HORIZ_PLANE_LEFT] = LLPlane(tnorm, d);  	}  	{ -		LLVector3 tnorm = LLVector3(mLocalPlanes[PLANE_RIGHT]); +		LLVector3 tnorm; +		mLocalPlanes[PLANE_RIGHT].getVector3(tnorm);  		tnorm.rotVec(yaw, zaxis);  		d = -(mOrigin * tnorm);  		mHorizPlanes[HORIZ_PLANE_RIGHT] = LLPlane(tnorm, d); diff --git a/indra/llmath/llcamera.h b/indra/llmath/llcamera.h index 922d6f9fac..ec67b91d05 100644 --- a/indra/llmath/llcamera.h +++ b/indra/llmath/llcamera.h @@ -31,6 +31,7 @@  #include "llmath.h"  #include "llcoordframe.h"  #include "llplane.h" +#include "llvector4a.h"  const F32 DEFAULT_FIELD_OF_VIEW 	= 60.f * DEG_TO_RAD;  const F32 DEFAULT_ASPECT_RATIO 		= 640.f / 480.f; @@ -50,15 +51,6 @@ const F32 MIN_FAR_PLANE 	= 0.2f;  static const F32 MIN_FIELD_OF_VIEW = 5.0f * DEG_TO_RAD;  static const F32 MAX_FIELD_OF_VIEW = 175.f * DEG_TO_RAD; -static const LLVector3 X_AXIS(1.f,0.f,0.f); -static const LLVector3 Y_AXIS(0.f,1.f,0.f); -static const LLVector3 Z_AXIS(0.f,0.f,1.f); - -static const LLVector3 NEG_X_AXIS(-1.f,0.f,0.f); -static const LLVector3 NEG_Y_AXIS(0.f,-1.f,0.f); -static const LLVector3 NEG_Z_AXIS(0.f,0.f,-1.f); - -  // An LLCamera is an LLCoorFrame with a view frustum.  // This means that it has several methods for moving it around   // that are inherited from the LLCoordFrame() class : @@ -73,6 +65,12 @@ class LLCamera  : 	public LLCoordFrame  {  public: +	 +	LLCamera(const LLCamera& rhs) +	{ +		*this = rhs; +	} +	  	enum {  		PLANE_LEFT = 0,  		PLANE_RIGHT = 1, @@ -110,6 +108,9 @@ public:  	};  private: +	LLPlane mAgentPlanes[7];  //frustum planes in agent space a la gluUnproject (I'm a bastard, I know) - DaveP +	U8 mPlaneMask[8];         // 8 for alignment	 +	  	F32 mView;					// angle between top and bottom frustum planes in radians.  	F32 mAspect;				// width/height  	S32 mViewHeightInPixels;	// for ViewHeightInPixels() only @@ -123,30 +124,22 @@ private:  	LLPlane mWorldPlanes[PLANE_NUM];  	LLPlane mHorizPlanes[HORIZ_PLANE_NUM]; -	struct frustum_plane -	{ -		frustum_plane() : mask(0) {} -		LLPlane p; -		U8 mask; -	}; -	frustum_plane mAgentPlanes[7];  //frustum planes in agent space a la gluUnproject (I'm a bastard, I know) - DaveP -  	U32 mPlaneCount;  //defaults to 6, if setUserClipPlane is called, uses user supplied clip plane in  	LLVector3 mWorldPlanePos;		// Position of World Planes (may be offset from camera)  public:  	LLVector3 mAgentFrustum[8];  //8 corners of 6-plane frustum  	F32	mFrustumCornerDist;		//distance to corner of frustum against far clip plane -	LLPlane getAgentPlane(U32 idx) { return mAgentPlanes[idx].p; } +	LLPlane& getAgentPlane(U32 idx) { return mAgentPlanes[idx]; }  public:  	LLCamera();  	LLCamera(F32 vertical_fov_rads, F32 aspect_ratio, S32 view_height_in_pixels, F32 near_plane, F32 far_plane); -	virtual ~LLCamera(){} // no-op virtual destructor +	virtual ~LLCamera(); +	 -	void setUserClipPlane(LLPlane plane); +	void setUserClipPlane(LLPlane& plane);  	void disableUserClipPlane(); -	U8 calcPlaneMask(const LLPlane& plane);  	virtual void setView(F32 vertical_fov_rads);  	void setViewHeightInPixels(S32 height);  	void setAspect(F32 new_aspect); @@ -193,8 +186,8 @@ public:  	S32 sphereInFrustum(const LLVector3 ¢er, const F32 radius) const;  	S32 pointInFrustum(const LLVector3 &point) const { return sphereInFrustum(point, 0.0f); }  	S32 sphereInFrustumFull(const LLVector3 ¢er, const F32 radius) const { return sphereInFrustum(center, radius); } -	S32 AABBInFrustum(const LLVector3 ¢er, const LLVector3& radius); -	S32 AABBInFrustumNoFarClip(const LLVector3 ¢er, const LLVector3& radius); +	S32 AABBInFrustum(const LLVector4a& center, const LLVector4a& radius); +	S32 AABBInFrustumNoFarClip(const LLVector4a& center, const LLVector4a& radius);  	//does a quick 'n dirty sphere-sphere check  	S32 sphereInFrustumQuick(const LLVector3 &sphere_center, const F32 radius);  diff --git a/indra/llmath/llcoord.h b/indra/llmath/llcoord.h index 706ad92787..9b76268afd 100644 --- a/indra/llmath/llcoord.h +++ b/indra/llmath/llcoord.h @@ -26,80 +26,87 @@  #ifndef LL_LLCOORD_H  #define LL_LLCOORD_H +template<typename> class LLCoord; +struct LL_COORD_TYPE_GL; +struct LL_COORD_TYPE_WINDOW; +struct LL_COORD_TYPE_SCREEN; + +typedef LLCoord<LL_COORD_TYPE_GL> LLCoordGL; +typedef LLCoord<LL_COORD_TYPE_WINDOW> LLCoordWindow; +typedef LLCoord<LL_COORD_TYPE_SCREEN> LLCoordScreen; + +struct LLCoordCommon +{ +	LLCoordCommon(S32 x, S32 y) : mX(x), mY(y) {} +	LLCoordCommon() : mX(0), mY(0) {} +	S32 mX; +	S32 mY; +}; +  // A two-dimensional pixel value -class LLCoord +template<typename COORD_FRAME> +class LLCoord : protected COORD_FRAME  {  public: -	S32		mX; -	S32		mY; +	typedef LLCoord<COORD_FRAME> self_t; +	typename COORD_FRAME::value_t	mX; +	typename COORD_FRAME::value_t	mY;  	LLCoord():	mX(0), mY(0)  	{} -	LLCoord(S32 x, S32 y): mX(x), mY(y) -	{} -	virtual ~LLCoord() +	LLCoord(typename COORD_FRAME::value_t x, typename COORD_FRAME::value_t y): mX(x), mY(y)  	{} -	virtual void set(S32 x, S32 y)		{ mX = x; mY = y; } -}; +	LLCoord(const LLCoordCommon& other) +	{ +		COORD_FRAME::convertFromCommon(other); +	} +	LLCoordCommon convert() const +	{ +		return COORD_FRAME::convertToCommon(); +	} -// GL coordinates start in the client region of a window, -// with left, bottom = 0, 0 -class LLCoordGL : public LLCoord -{ -public: -	LLCoordGL() : LLCoord() -	{} -	LLCoordGL(S32 x, S32 y) : LLCoord(x, y) -	{} -	bool operator==(const LLCoordGL& other) const { return mX == other.mX && mY == other.mY; } -	bool operator!=(const LLCoordGL& other) const { return !(*this == other); } -}; +	void set(typename COORD_FRAME::value_t x, typename COORD_FRAME::value_t y) { mX = x; mY = y;} +	bool operator==(const self_t& other) const { return mX == other.mX && mY == other.mY; } +	bool operator!=(const self_t& other) const { return !(*this == other); } -//bool operator ==(const LLCoordGL& a, const LLCoordGL& b); +	static const self_t& getTypedCoords(const COORD_FRAME& self) { return static_cast<const self_t&>(self); } +	static self_t& getTypedCoords(COORD_FRAME& self) { return static_cast<self_t&>(self); } +}; -// Window coords include things like window borders, -// menu regions, etc. -class LLCoordWindow : public LLCoord +struct LL_COORD_TYPE_GL   { -public: -	LLCoordWindow() : LLCoord() -	{} -	LLCoordWindow(S32 x, S32 y) : LLCoord(x, y) -	{} -	bool operator==(const LLCoordWindow& other) const { return mX == other.mX && mY == other.mY; } -	bool operator!=(const LLCoordWindow& other) const { return !(*this == other); } -}; +	typedef S32 value_t; +	LLCoordCommon convertToCommon() const +	{ +		const LLCoordGL& self = LLCoordGL::getTypedCoords(*this); +		return LLCoordCommon(self.mX, self.mY); +	} -// Screen coords start at left, top = 0, 0 -class LLCoordScreen : public LLCoord +	void convertFromCommon(const LLCoordCommon& from) +	{ +		LLCoordGL& self = LLCoordGL::getTypedCoords(*this); +		self.mX = from.mX; +		self.mY = from.mY; +	} +}; + +struct LL_COORD_TYPE_WINDOW   { -public: -	LLCoordScreen() : LLCoord() -	{} -	LLCoordScreen(S32 x, S32 y) : LLCoord(x, y) -	{} -	bool operator==(const LLCoordScreen& other) const { return mX == other.mX && mY == other.mY; } -	bool operator!=(const LLCoordScreen& other) const { return !(*this == other); } +	typedef S32 value_t; + +	LLCoordCommon convertToCommon() const; +	void convertFromCommon(const LLCoordCommon& from);  }; -class LLCoordFont : public LLCoord +struct LL_COORD_TYPE_SCREEN   { -public: -	F32 mZ; -	 -	LLCoordFont() : LLCoord(), mZ(0.f) -	{} -	LLCoordFont(S32 x, S32 y, F32 z = 0) : LLCoord(x,y), mZ(z) -	{} -	 -	void set(S32 x, S32 y) { LLCoord::set(x,y); mZ = 0.f; } -	void set(S32 x, S32 y, F32 z) { mX = x; mY = y; mZ = z; } -	bool operator==(const LLCoordFont& other) const { return mX == other.mX && mY == other.mY; } -	bool operator!=(const LLCoordFont& other) const { return !(*this == other); } +	typedef S32 value_t; + +	LLCoordCommon convertToCommon() const; +	void convertFromCommon(const LLCoordCommon& from);  }; -	  #endif diff --git a/indra/llmath/llmath.h b/indra/llmath/llmath.h index b86768456d..b93f89d674 100644 --- a/indra/llmath/llmath.h +++ b/indra/llmath/llmath.h @@ -29,7 +29,7 @@  #include <cmath>  #include <cstdlib> -#include <complex> +#include <vector>  #include "lldefs.h"  //#include "llstl.h" // *TODO: Remove when LLString is gone  //#include "llstring.h" // *TODO: Remove when LLString is gone @@ -55,32 +55,11 @@  #endif  // Single Precision Floating Point Routines -#ifndef sqrtf -#define sqrtf(x)	((F32)sqrt((F64)(x))) -#endif -#ifndef fsqrtf -#define fsqrtf(x)	sqrtf(x) -#endif - -#ifndef cosf -#define cosf(x)		((F32)cos((F64)(x))) -#endif -#ifndef sinf -#define sinf(x)		((F32)sin((F64)(x))) -#endif -#ifndef tanf +// (There used to be more defined here, but they appeared to be redundant and  +// were breaking some other includes. Removed by Falcon, reviewed by Andrew, 11/25/09) +/*#ifndef tanf  #define tanf(x)		((F32)tan((F64)(x))) -#endif -#ifndef acosf -#define acosf(x)	((F32)acos((F64)(x))) -#endif - -#ifndef powf -#define powf(x,y)	((F32)pow((F64)(x),(F64)(y))) -#endif -#ifndef expf -#define expf(x)		((F32)exp((F64)(x))) -#endif +#endif*/  const F32	GRAVITY			= -9.8f; @@ -200,7 +179,7 @@ inline S32 llfloor( F32 f )  		}  		return result;  #else -		return (S32)floorf(f); +		return (S32)floor(f);  #endif  } @@ -378,11 +357,14 @@ inline F32 snap_to_sig_figs(F32 foo, S32 sig_figs)  		bar *= 10.f;  	} -	foo = (F32)llround(foo * bar); +	//F32 new_foo = (F32)llround(foo * bar); +	// the llround() implementation sucks.  Don't us it. + +	F32 sign = (foo > 0.f) ? 1.f : -1.f; +	F32 new_foo = F32( S64(foo * bar + sign * 0.5f)); +	new_foo /= bar; -	// shift back -	foo /= bar; -	return foo; +	return new_foo;  }  inline F32 lerp(F32 a, F32 b, F32 u)  @@ -516,4 +498,52 @@ inline F32 llgaussian(F32 x, F32 o)  	return 1.f/(F_SQRT_TWO_PI*o)*powf(F_E, -(x*x)/(2*o*o));  } +//helper function for removing outliers +template <class VEC_TYPE> +inline void ll_remove_outliers(std::vector<VEC_TYPE>& data, F32 k) +{ +	if (data.size() < 100) +	{ //not enough samples +		return; +	} + +	VEC_TYPE Q1 = data[data.size()/4]; +	VEC_TYPE Q3 = data[data.size()-data.size()/4-1]; + +	if ((F32)(Q3-Q1) < 1.f) +	{ +		// not enough variation to detect outliers +		return; +	} + + +	VEC_TYPE min = (VEC_TYPE) ((F32) Q1-k * (F32) (Q3-Q1)); +	VEC_TYPE max = (VEC_TYPE) ((F32) Q3+k * (F32) (Q3-Q1)); + +	U32 i = 0; +	while (i < data.size() && data[i] < min) +	{ +		i++; +	} + +	S32 j = data.size()-1; +	while (j > 0 && data[j] > max) +	{ +		j--; +	} + +	if (j < data.size()-1) +	{ +		data.erase(data.begin()+j, data.end()); +	} + +	if (i > 0) +	{ +		data.erase(data.begin(), data.begin()+i); +	} +} + +// Include simd math header +#include "llsimdmath.h" +  #endif diff --git a/indra/llmath/llmatrix3a.cpp b/indra/llmath/llmatrix3a.cpp new file mode 100644 index 0000000000..ab077abcb0 --- /dev/null +++ b/indra/llmath/llmatrix3a.cpp @@ -0,0 +1,134 @@ +/**  + * @file llvector4a.cpp + * @brief SIMD vector implementation + * + * $LicenseInfo:firstyear=2010&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2010, Linden Research, Inc. + *  + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + *  + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * Lesser General Public License for more details. + *  + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA + *  + * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA + * $/LicenseInfo$ + */ + +#include "llmath.h" + +static LL_ALIGN_16(const F32 M_IDENT_3A[12]) =  +												{	1.f, 0.f, 0.f, 0.f, // Column 1 +													0.f, 1.f, 0.f, 0.f, // Column 2 +													0.f, 0.f, 1.f, 0.f }; // Column 3 + +extern const LLMatrix3a LL_M3A_IDENTITY = *reinterpret_cast<const LLMatrix3a*> (M_IDENT_3A); + +void LLMatrix3a::setMul( const LLMatrix3a& lhs, const LLMatrix3a& rhs ) +{ +	const LLVector4a col0 = lhs.getColumn(0); +	const LLVector4a col1 = lhs.getColumn(1); +	const LLVector4a col2 = lhs.getColumn(2); + +	for ( int i = 0; i < 3; i++ ) +	{ +		LLVector4a xxxx = _mm_load_ss( rhs.mColumns[i].getF32ptr() ); +		xxxx.splat<0>( xxxx ); +		xxxx.mul( col0 ); + +		{ +			LLVector4a yyyy = _mm_load_ss( rhs.mColumns[i].getF32ptr() +  1 ); +			yyyy.splat<0>( yyyy ); +			yyyy.mul( col1 );  +			xxxx.add( yyyy ); +		} + +		{ +			LLVector4a zzzz = _mm_load_ss( rhs.mColumns[i].getF32ptr() +  2 ); +			zzzz.splat<0>( zzzz ); +			zzzz.mul( col2 ); +			xxxx.add( zzzz ); +		} + +		xxxx.store4a( mColumns[i].getF32ptr() ); +	} +	 +} + +/*static */void LLMatrix3a::batchTransform( const LLMatrix3a& xform, const LLVector4a* src, int numVectors, LLVector4a* dst ) +{ +	const LLVector4a col0 = xform.getColumn(0); +	const LLVector4a col1 = xform.getColumn(1); +	const LLVector4a col2 = xform.getColumn(2); +	const LLVector4a* maxAddr = src + numVectors; + +	if ( numVectors & 0x1 ) +	{ +		LLVector4a xxxx = _mm_load_ss( (const F32*)src ); +		LLVector4a yyyy = _mm_load_ss( (const F32*)src + 1 ); +		LLVector4a zzzz = _mm_load_ss( (const F32*)src + 2 ); +		xxxx.splat<0>( xxxx ); +		yyyy.splat<0>( yyyy ); +		zzzz.splat<0>( zzzz ); +		xxxx.mul( col0 ); +		yyyy.mul( col1 );  +		zzzz.mul( col2 ); +		xxxx.add( yyyy ); +		xxxx.add( zzzz ); +		xxxx.store4a( (F32*)dst ); +		src++; +		dst++; +	} + + +	numVectors >>= 1; +	while ( src < maxAddr ) +	{ +		_mm_prefetch( (const char*)(src + 32 ), _MM_HINT_NTA ); +		_mm_prefetch( (const char*)(dst + 32), _MM_HINT_NTA ); +		LLVector4a xxxx = _mm_load_ss( (const F32*)src ); +		LLVector4a xxxx1= _mm_load_ss( (const F32*)(src + 1) ); + +		xxxx.splat<0>( xxxx ); +		xxxx1.splat<0>( xxxx1 ); +		xxxx.mul( col0 ); +		xxxx1.mul( col0 ); + +		{ +			LLVector4a yyyy = _mm_load_ss( (const F32*)src + 1 ); +			LLVector4a yyyy1 = _mm_load_ss( (const F32*)(src + 1) + 1); +			yyyy.splat<0>( yyyy ); +			yyyy1.splat<0>( yyyy1 ); +			yyyy.mul( col1 ); +			yyyy1.mul( col1 ); +			xxxx.add( yyyy ); +			xxxx1.add( yyyy1 ); +		} + +		{ +			LLVector4a zzzz = _mm_load_ss( (const F32*)(src) + 2 ); +			LLVector4a zzzz1 = _mm_load_ss( (const F32*)(++src) + 2 ); +			zzzz.splat<0>( zzzz ); +			zzzz1.splat<0>( zzzz1 ); +			zzzz.mul( col2 ); +			zzzz1.mul( col2 ); +			xxxx.add( zzzz ); +			xxxx1.add( zzzz1 ); +		} + +		xxxx.store4a(dst->getF32ptr()); +		src++; +		dst++; + +		xxxx1.store4a((F32*)dst++); +	} +} diff --git a/indra/llmath/llmatrix3a.h b/indra/llmath/llmatrix3a.h new file mode 100644 index 0000000000..adb7e3389d --- /dev/null +++ b/indra/llmath/llmatrix3a.h @@ -0,0 +1,128 @@ +/**  + * @file llmatrix3a.h + * @brief LLMatrix3a class header file - memory aligned and vectorized 3x3 matrix + * + * $LicenseInfo:firstyear=2010&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2010, Linden Research, Inc. + *  + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + *  + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * Lesser General Public License for more details. + *  + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA + *  + * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA + * $/LicenseInfo$ + */ + +#ifndef	LL_LLMATRIX3A_H +#define	LL_LLMATRIX3A_H + +///////////////////////////// +// LLMatrix3a, LLRotation +///////////////////////////// +// This class stores a 3x3 (technically 4x3) matrix in column-major order +///////////////////////////// +///////////////////////////// +// These classes are intentionally minimal right now. If you need additional +// functionality, please contact someone with SSE experience (e.g., Falcon or +// Huseby). +///////////////////////////// + +// LLMatrix3a is the base class for LLRotation, which should be used instead any time you're dealing with a  +// rotation matrix. +class LLMatrix3a +{ +public: + +	// Utility function for quickly transforming an array of LLVector4a's +	// For transforming a single LLVector4a, see LLVector4a::setRotated +	static void batchTransform( const LLMatrix3a& xform, const LLVector4a* src, int numVectors, LLVector4a* dst ); + +	// Utility function to obtain the identity matrix +	static inline const LLMatrix3a& getIdentity(); + +	////////////////////////// +	// Ctors +	////////////////////////// +	 +	// Ctor +	LLMatrix3a() {} + +	// Ctor for setting by columns +	inline LLMatrix3a( const LLVector4a& c0, const LLVector4a& c1, const LLVector4a& c2 ); + +	////////////////////////// +	// Get/Set +	////////////////////////// + +	// Loads from an LLMatrix3 +	inline void loadu(const LLMatrix3& src); +	 +	// Set rows +	inline void setRows(const LLVector4a& r0, const LLVector4a& r1, const LLVector4a& r2); +	 +	// Set columns +	inline void setColumns(const LLVector4a& c0, const LLVector4a& c1, const LLVector4a& c2); + +	// Get the read-only access to a specified column. Valid columns are 0-2, but the  +	// function is unchecked. You've been warned. +	inline const LLVector4a& getColumn(const U32 column) const; + +	///////////////////////// +	// Matrix modification +	///////////////////////// +	 +	// Set this matrix to the product of lhs and rhs ( this = lhs * rhs ) +	void setMul( const LLMatrix3a& lhs, const LLMatrix3a& rhs ); + +	// Set this matrix to the transpose of src +	inline void setTranspose(const LLMatrix3a& src); + +	// Set this matrix to a*w + b*(1-w) +	inline void setLerp(const LLMatrix3a& a, const LLMatrix3a& b, F32 w); + +	///////////////////////// +	// Matrix inspection +	///////////////////////// + +	// Sets all 4 elements in 'dest' to the determinant of this matrix. +	// If you will be using the determinant in subsequent ops with LLVector4a, use this version +	inline void getDeterminant( LLVector4a& dest ) const; + +	// Returns the determinant as an LLSimdScalar. Use this if you will be using the determinant +	// primary for scalar operations. +	inline LLSimdScalar getDeterminant() const; + +	// Returns nonzero if rows 0-2 and colums 0-2 contain no NaN or INF values. Row 3 is ignored +	inline LLBool32 isFinite() const; + +	// Returns true if this matrix is equal to 'rhs' up to 'tolerance' +	inline bool isApproximatelyEqual( const LLMatrix3a& rhs, F32 tolerance = F_APPROXIMATELY_ZERO ) const; + +protected: + +	LLVector4a mColumns[3]; + +}; + +class LLRotation : public LLMatrix3a +{ +public: +	 +	LLRotation() {} +	 +	// Returns true if this rotation is orthonormal with det ~= 1 +	inline bool isOkRotation() const;		 +}; + +#endif diff --git a/indra/llmath/llmatrix3a.inl b/indra/llmath/llmatrix3a.inl new file mode 100644 index 0000000000..37819fea3c --- /dev/null +++ b/indra/llmath/llmatrix3a.inl @@ -0,0 +1,119 @@ +/**  + * @file llmatrix3a.inl + * @brief LLMatrix3a inline definitions + * + * $LicenseInfo:firstyear=2010&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2010, Linden Research, Inc. + *  + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + *  + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * Lesser General Public License for more details. + *  + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA + *  + * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA + * $/LicenseInfo$ + */ + +#include "llmatrix3a.h" +#include "m3math.h" + +inline LLMatrix3a::LLMatrix3a( const LLVector4a& c0, const LLVector4a& c1, const LLVector4a& c2 ) +{ +	setColumns( c0, c1, c2 ); +} + +inline void LLMatrix3a::loadu(const LLMatrix3& src) +{ +	mColumns[0].load3(src.mMatrix[0]); +	mColumns[1].load3(src.mMatrix[1]); +	mColumns[2].load3(src.mMatrix[2]); +} + +inline void LLMatrix3a::setRows(const LLVector4a& r0, const LLVector4a& r1, const LLVector4a& r2) +{ +	mColumns[0] = r0; +	mColumns[1] = r1; +	mColumns[2] = r2; +	setTranspose( *this ); +} + +inline void LLMatrix3a::setColumns(const LLVector4a& c0, const LLVector4a& c1, const LLVector4a& c2) +{ +	mColumns[0] = c0; +	mColumns[1] = c1; +	mColumns[2] = c2; +} + +inline void LLMatrix3a::setTranspose(const LLMatrix3a& src) +{ +	const LLQuad srcCol0 = src.mColumns[0]; +	const LLQuad srcCol1 = src.mColumns[1]; +	const LLQuad unpacklo = _mm_unpacklo_ps( srcCol0, srcCol1 ); +	mColumns[0] = _mm_movelh_ps( unpacklo, src.mColumns[2] ); +	mColumns[1] = _mm_shuffle_ps( _mm_movehl_ps( srcCol0, unpacklo ), src.mColumns[2], _MM_SHUFFLE(0, 1, 1, 0) ); +	mColumns[2] = _mm_shuffle_ps( _mm_unpackhi_ps( srcCol0, srcCol1 ), src.mColumns[2], _MM_SHUFFLE(0, 2, 1, 0) ); +} + +inline const LLVector4a& LLMatrix3a::getColumn(const U32 column) const +{ +	llassert( column < 3 ); +	return mColumns[column]; +} + +inline void LLMatrix3a::setLerp(const LLMatrix3a& a, const LLMatrix3a& b, F32 w) +{ +	mColumns[0].setLerp( a.mColumns[0], b.mColumns[0], w ); +	mColumns[1].setLerp( a.mColumns[1], b.mColumns[1], w ); +	mColumns[2].setLerp( a.mColumns[2], b.mColumns[2], w ); +} + +inline LLBool32 LLMatrix3a::isFinite() const +{ +	return mColumns[0].isFinite3() && mColumns[1].isFinite3() && mColumns[2].isFinite3(); +} + +inline void LLMatrix3a::getDeterminant( LLVector4a& dest ) const +{ +	LLVector4a col1xcol2; col1xcol2.setCross3( mColumns[1], mColumns[2] ); +	dest.setAllDot3( col1xcol2, mColumns[0] ); +} + +inline LLSimdScalar LLMatrix3a::getDeterminant() const +{ +	LLVector4a col1xcol2; col1xcol2.setCross3( mColumns[1], mColumns[2] ); +	return col1xcol2.dot3( mColumns[0] ); +} + +inline bool LLMatrix3a::isApproximatelyEqual( const LLMatrix3a& rhs, F32 tolerance /*= F_APPROXIMATELY_ZERO*/ ) const +{ +	return rhs.getColumn(0).equals3(mColumns[0], tolerance)  +		&& rhs.getColumn(1).equals3(mColumns[1], tolerance)  +		&& rhs.getColumn(2).equals3(mColumns[2], tolerance);  +} + +inline const LLMatrix3a& LLMatrix3a::getIdentity() +{ +	extern const LLMatrix3a LL_M3A_IDENTITY; +	return LL_M3A_IDENTITY; +} + +inline bool LLRotation::isOkRotation() const +{ +	LLMatrix3a transpose; transpose.setTranspose( *this ); +	LLMatrix3a product; product.setMul( *this, transpose ); + +	LLSimdScalar detMinusOne = getDeterminant() - 1.f; + +	return product.isApproximatelyEqual( LLMatrix3a::getIdentity() ) && (detMinusOne.getAbs() < F_APPROXIMATELY_ZERO); +} + diff --git a/indra/llmath/llmatrix4a.h b/indra/llmath/llmatrix4a.h new file mode 100644 index 0000000000..27cf5b79f6 --- /dev/null +++ b/indra/llmath/llmatrix4a.h @@ -0,0 +1,143 @@ +/**  + * @file llmatrix4a.h + * @brief LLMatrix4a class header file - memory aligned and vectorized 4x4 matrix + * + * $LicenseInfo:firstyear=2007&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2010, Linden Research, Inc. + *  + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + *  + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * Lesser General Public License for more details. + *  + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA + *  + * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA + * $/LicenseInfo$ + */ + +#ifndef	LL_LLMATRIX4A_H +#define	LL_LLMATRIX4A_H + +#include "llvector4a.h" +#include "m4math.h" +#include "m3math.h" + +class LLMatrix4a +{ +public: +	LLVector4a mMatrix[4]; + +	inline void clear() +	{ +		mMatrix[0].clear(); +		mMatrix[1].clear(); +		mMatrix[2].clear(); +		mMatrix[3].clear(); +	} + +	inline void loadu(const LLMatrix4& src) +	{ +		mMatrix[0] = _mm_loadu_ps(src.mMatrix[0]); +		mMatrix[1] = _mm_loadu_ps(src.mMatrix[1]); +		mMatrix[2] = _mm_loadu_ps(src.mMatrix[2]); +		mMatrix[3] = _mm_loadu_ps(src.mMatrix[3]); +		 +	} + +	inline void loadu(const LLMatrix3& src) +	{ +		mMatrix[0].load3(src.mMatrix[0]); +		mMatrix[1].load3(src.mMatrix[1]); +		mMatrix[2].load3(src.mMatrix[2]); +		mMatrix[3].set(0,0,0,1.f); +	} + +	inline void add(const LLMatrix4a& rhs) +	{ +		mMatrix[0].add(rhs.mMatrix[0]); +		mMatrix[1].add(rhs.mMatrix[1]); +		mMatrix[2].add(rhs.mMatrix[2]); +		mMatrix[3].add(rhs.mMatrix[3]); +	} + +	inline void setRows(const LLVector4a& r0, const LLVector4a& r1, const LLVector4a& r2) +	{ +		mMatrix[0] = r0; +		mMatrix[1] = r1; +		mMatrix[2] = r2; +	} + +	inline void setMul(const LLMatrix4a& m, const F32 s) +	{ +		mMatrix[0].setMul(m.mMatrix[0], s); +		mMatrix[1].setMul(m.mMatrix[1], s); +		mMatrix[2].setMul(m.mMatrix[2], s); +		mMatrix[3].setMul(m.mMatrix[3], s); +	} + +	inline void setLerp(const LLMatrix4a& a, const LLMatrix4a& b, F32 w) +	{ +		LLVector4a d0,d1,d2,d3; +		d0.setSub(b.mMatrix[0], a.mMatrix[0]); +		d1.setSub(b.mMatrix[1], a.mMatrix[1]); +		d2.setSub(b.mMatrix[2], a.mMatrix[2]); +		d3.setSub(b.mMatrix[3], a.mMatrix[3]); + +		// this = a + d*w +		 +		d0.mul(w); +		d1.mul(w); +		d2.mul(w); +		d3.mul(w); + +		mMatrix[0].setAdd(a.mMatrix[0],d0); +		mMatrix[1].setAdd(a.mMatrix[1],d1); +		mMatrix[2].setAdd(a.mMatrix[2],d2); +		mMatrix[3].setAdd(a.mMatrix[3],d3); +	} + +	inline void rotate(const LLVector4a& v, LLVector4a& res) +	{ +		res = _mm_shuffle_ps(v, v, _MM_SHUFFLE(0, 0, 0, 0)); +		res.mul(mMatrix[0]); +		 +		LLVector4a y; +		y = _mm_shuffle_ps(v, v, _MM_SHUFFLE(1, 1, 1, 1)); +		y.mul(mMatrix[1]); + +		LLVector4a z; +		z = _mm_shuffle_ps(v, v, _MM_SHUFFLE(2, 2, 2, 2)); +		z.mul(mMatrix[2]); + +		res.add(y); +		res.add(z); +	} + +	inline void affineTransform(const LLVector4a& v, LLVector4a& res) +	{ +		LLVector4a x,y,z; + +		x = _mm_shuffle_ps(v, v, _MM_SHUFFLE(0, 0, 0, 0)); +		y = _mm_shuffle_ps(v, v, _MM_SHUFFLE(1, 1, 1, 1)); +		z = _mm_shuffle_ps(v, v, _MM_SHUFFLE(2, 2, 2, 2)); +		 +		x.mul(mMatrix[0]); +		y.mul(mMatrix[1]); +		z.mul(mMatrix[2]); + +		x.add(y); +		z.add(mMatrix[3]); +		res.setAdd(x,z); +	} +}; + +#endif diff --git a/indra/llmath/lloctree.h b/indra/llmath/lloctree.h index 90d4d742c9..1b11e83b4a 100644 --- a/indra/llmath/lloctree.h +++ b/indra/llmath/lloctree.h @@ -29,21 +29,20 @@  #include "lltreenode.h"  #include "v3math.h" +#include "llvector4a.h"  #include <vector>  #include <set> -#if LL_RELEASE_WITH_DEBUG_INFO || LL_DEBUG -#define OCT_ERRS LL_ERRS("OctreeErrors") -#else  #define OCT_ERRS LL_WARNS("OctreeErrors") -#endif -#define LL_OCTREE_PARANOIA_CHECK 0 + +extern U32 gOctreeMaxCapacity; +/*#define LL_OCTREE_PARANOIA_CHECK 0  #if LL_DARWIN  #define LL_OCTREE_MAX_CAPACITY 32  #else  #define LL_OCTREE_MAX_CAPACITY 128 -#endif +#endif*/  template <class T> class LLOctreeNode; @@ -67,39 +66,56 @@ public:  };  template <class T> +class LLOctreeTravelerDepthFirst : public LLOctreeTraveler<T> +{ +public: +	virtual void traverse(const LLOctreeNode<T>* node); +}; + +template <class T>  class LLOctreeNode : public LLTreeNode<T>  {  public: +  	typedef LLOctreeTraveler<T>									oct_traveler;  	typedef LLTreeTraveler<T>									tree_traveler;  	typedef typename std::set<LLPointer<T> >					element_list; -	typedef typename std::set<LLPointer<T> >::iterator			element_iter; -	typedef typename std::set<LLPointer<T> >::const_iterator	const_element_iter; +	typedef typename element_list::iterator						element_iter; +	typedef typename element_list::const_iterator	const_element_iter;  	typedef typename std::vector<LLTreeListener<T>*>::iterator	tree_listener_iter;  	typedef typename std::vector<LLOctreeNode<T>* >				child_list;  	typedef LLTreeNode<T>		BaseType;  	typedef LLOctreeNode<T>		oct_node;  	typedef LLOctreeListener<T>	oct_listener; -	static const U8 OCTANT_POSITIVE_X = 0x01; -	static const U8 OCTANT_POSITIVE_Y = 0x02; -	static const U8 OCTANT_POSITIVE_Z = 0x04; -		 -	LLOctreeNode(	LLVector3d center,  -					LLVector3d size,  +	/*void* operator new(size_t size) +	{ +		return ll_aligned_malloc_16(size); +	} + +	void operator delete(void* ptr) +	{ +		ll_aligned_free_16(ptr); +	}*/ + +	LLOctreeNode(	const LLVector4a& center,  +					const LLVector4a& size,   					BaseType* parent,   					U8 octant = 255)  	:	mParent((oct_node*)parent),  -		mCenter(center),  -		mSize(size),   		mOctant(octant)   	{  +		mCenter = center; +		mSize = size; +  		updateMinMax();  		if ((mOctant == 255) && mParent)  		{ -			mOctant = ((oct_node*) mParent)->getOctant(mCenter.mdV); +			mOctant = ((oct_node*) mParent)->getOctant(mCenter);  		} +		mElementCount = 0; +  		clearChildren();  	} @@ -114,40 +130,24 @@ public:  	}  	inline const BaseType* getParent()	const			{ return mParent; } -	inline void setParent(BaseType* parent)			{ mParent = (oct_node*) parent; } -	inline const LLVector3d& getCenter() const			{ return mCenter; } -	inline const LLVector3d& getSize() const			{ return mSize; } -	inline void setCenter(LLVector3d center)			{ mCenter = center; } -	inline void setSize(LLVector3d size)				{ mSize = size; } -    inline oct_node* getNodeAt(T* data)				{ return getNodeAt(data->getPositionGroup(), data->getBinRadius()); } -	inline U8 getOctant() const						{ return mOctant; } -	inline void setOctant(U8 octant)					{ mOctant = octant; } +	inline void setParent(BaseType* parent)				{ mParent = (oct_node*) parent; } +	inline const LLVector4a& getCenter() const			{ return mCenter; } +	inline const LLVector4a& getSize() const			{ return mSize; } +	inline void setCenter(const LLVector4a& center)		{ mCenter = center; } +	inline void setSize(const LLVector4a& size)			{ mSize = size; } +    inline oct_node* getNodeAt(T* data)					{ return getNodeAt(data->getPositionGroup(), data->getBinRadius()); } +	inline U8 getOctant() const							{ return mOctant; }  	inline const oct_node*	getOctParent() const		{ return (const oct_node*) getParent(); }  	inline oct_node* getOctParent() 					{ return (oct_node*) getParent(); } -	U8 getOctant(const F64 pos[]) const	//get the octant pos is in +	U8 getOctant(const LLVector4a& pos) const			//get the octant pos is in  	{ -		U8 ret = 0; - -		if (pos[0] > mCenter.mdV[0]) -		{ -			ret |= OCTANT_POSITIVE_X; -		} -		if (pos[1] > mCenter.mdV[1]) -		{ -			ret |= OCTANT_POSITIVE_Y; -		} -		if (pos[2] > mCenter.mdV[2]) -		{ -			ret |= OCTANT_POSITIVE_Z; -		} - -		return ret; +		return (U8) (pos.greaterThan(mCenter).getGatheredBits() & 0x7);  	} -	inline bool isInside(const LLVector3d& pos, const F64& rad) const +	inline bool isInside(const LLVector4a& pos, const F32& rad) const  	{ -		return rad <= mSize.mdV[0]*2.0 && isInside(pos);  +		return rad <= mSize[0]*2.f && isInside(pos);   	}  	inline bool isInside(T* data) const			 @@ -155,29 +155,27 @@ public:  		return isInside(data->getPositionGroup(), data->getBinRadius());  	} -	bool isInside(const LLVector3d& pos) const +	bool isInside(const LLVector4a& pos) const  	{ -		const F64& x = pos.mdV[0]; -		const F64& y = pos.mdV[1]; -		const F64& z = pos.mdV[2]; -			 -		if (x > mMax.mdV[0] || x <= mMin.mdV[0] || -			y > mMax.mdV[1] || y <= mMin.mdV[1] || -			z > mMax.mdV[2] || z <= mMin.mdV[2]) +		S32 gt = pos.greaterThan(mMax).getGatheredBits() & 0x7; +		if (gt)  		{  			return false;  		} -		 + +		S32 lt = pos.lessEqual(mMin).getGatheredBits() & 0x7; +		if (lt) +		{ +			return false; +		} +				  		return true;  	}  	void updateMinMax()  	{ -		for (U32 i = 0; i < 3; i++) -		{ -			mMax.mdV[i] = mCenter.mdV[i] + mSize.mdV[i]; -			mMin.mdV[i] = mCenter.mdV[i] - mSize.mdV[i]; -		} +		mMax.setAdd(mCenter, mSize); +		mMin.setSub(mCenter, mSize);  	}  	inline oct_listener* getOctListener(U32 index)  @@ -190,44 +188,44 @@ public:  		return contains(xform->getBinRadius());  	} -	bool contains(F64 radius) +	bool contains(F32 radius)  	{  		if (mParent == NULL)  		{	//root node contains nothing  			return false;  		} -		F64 size = mSize.mdV[0]; -		F64 p_size = size * 2.0; +		F32 size = mSize[0]; +		F32 p_size = size * 2.f; -		return (radius <= 0.001 && size <= 0.001) || +		return (radius <= 0.001f && size <= 0.001f) ||  				(radius <= p_size && radius > size);  	} -	static void pushCenter(LLVector3d ¢er, const LLVector3d &size, const T* data) +	static void pushCenter(LLVector4a ¢er, const LLVector4a &size, const T* data)  	{ -		const LLVector3d& pos = data->getPositionGroup(); -		for (U32 i = 0; i < 3; i++) -		{ -			if (pos.mdV[i] > center.mdV[i]) -			{ -				center.mdV[i] += size.mdV[i]; -			} -			else  -			{ -				center.mdV[i] -= size.mdV[i]; -			} -		} +		const LLVector4a& pos = data->getPositionGroup(); + +		LLVector4Logical gt = pos.greaterThan(center); + +		LLVector4a up; +		up = _mm_and_ps(size, gt); + +		LLVector4a down; +		down = _mm_andnot_ps(gt, size); + +		center.add(up); +		center.sub(down);  	}  	void accept(oct_traveler* visitor)				{ visitor->visit(this); }  	virtual bool isLeaf() const						{ return mChild.empty(); } -	U32 getElementCount() const						{ return mData.size(); } +	U32 getElementCount() const						{ return mElementCount; }  	element_list& getData()							{ return mData; }  	const element_list& getData() const				{ return mData; } -	U32 getChildCount()	const						{ return mChild.size(); } +	U32 getChildCount()	const						{ return mChildCount; }  	oct_node* getChild(U32 index)					{ return mChild[index]; }  	const oct_node* getChild(U32 index) const		{ return mChild[index]; }  	child_list& getChildren()						{ return mChild; } @@ -236,32 +234,49 @@ public:  	void accept(tree_traveler* visitor) const		{ visitor->visit(this); }  	void accept(oct_traveler* visitor) const		{ visitor->visit(this); } -	oct_node* getNodeAt(const LLVector3d& pos, const F64& rad) +	void validateChildMap() +	{ +		for (U32 i = 0; i < 8; i++) +		{ +			U8 idx = mChildMap[i]; +			if (idx != 255) +			{ +				LLOctreeNode<T>* child = mChild[idx]; + +				if (child->getOctant() != i) +				{ +					llerrs << "Invalid child map, bad octant data." << llendl; +				} + +				if (getOctant(child->getCenter()) != child->getOctant()) +				{ +					llerrs << "Invalid child octant compared to position data." << llendl; +				} +			} +		} +	} + + +	oct_node* getNodeAt(const LLVector4a& pos, const F32& rad)  	{   		LLOctreeNode<T>* node = this;  		if (node->isInside(pos, rad))  		{		  			//do a quick search by octant -			U8 octant = node->getOctant(pos.mdV); -			BOOL keep_going = TRUE; - +			U8 octant = node->getOctant(pos); +			  			//traverse the tree until we find a node that has no node  			//at the appropriate octant or is smaller than the object.    			//by definition, that node is the smallest node that contains   			// the data -			while (keep_going && node->getSize().mdV[0] >= rad) +			U8 next_node = node->mChildMap[octant]; +			 +			while (next_node != 255 && node->getSize()[0] >= rad)  			{	 -				keep_going = FALSE; -				for (U32 i = 0; i < node->getChildCount() && !keep_going; i++) -				{ -					if (node->getChild(i)->getOctant() == octant) -					{ -						node = node->getChild(i); -						octant = node->getOctant(pos.mdV); -						keep_going = TRUE; -					} -				} +				node = node->getChild(next_node); +				octant = node->getOctant(pos); +				next_node = node->mChildMap[octant];  			}  		}  		else if (!node->contains(rad) && node->getParent()) @@ -276,7 +291,7 @@ public:  	{  		if (data == NULL)  		{ -			//OCT_ERRS << "!!! INVALID ELEMENT ADDED TO OCTREE BRANCH !!!" << llendl; +			OCT_ERRS << "!!! INVALID ELEMENT ADDED TO OCTREE BRANCH !!!" << llendl;  			return false;  		}  		LLOctreeNode<T>* parent = getOctParent(); @@ -284,22 +299,16 @@ public:  		//is it here?  		if (isInside(data->getPositionGroup()))  		{ -			if (getElementCount() < LL_OCTREE_MAX_CAPACITY && -				(contains(data->getBinRadius()) || -				(data->getBinRadius() > getSize().mdV[0] && -				parent && parent->getElementCount() >= LL_OCTREE_MAX_CAPACITY)))  +			if ((getElementCount() < gOctreeMaxCapacity && contains(data->getBinRadius()) || +				(data->getBinRadius() > getSize()[0] &&	parent && parent->getElementCount() >= gOctreeMaxCapacity)))   			{ //it belongs here -#if LL_OCTREE_PARANOIA_CHECK  				//if this is a redundant insertion, error out (should never happen) -				if (mData.find(data) != mData.end()) -				{ -					llwarns << "Redundant octree insertion detected. " << data << llendl; -					return false; -				} -#endif +				llassert(mData.find(data) == mData.end());  				mData.insert(data);  				BaseType::insert(data); + +				mElementCount = mData.size();  				return true;  			}  			else @@ -317,19 +326,26 @@ public:  				}  				//it's here, but no kids are in the right place, make a new kid -				LLVector3d center(getCenter()); -				LLVector3d size(getSize()*0.5); +				LLVector4a center = getCenter(); +				LLVector4a size = getSize(); +				size.mul(0.5f);  				//push center in direction of data  				LLOctreeNode<T>::pushCenter(center, size, data);  				// handle case where floating point number gets too small -				if( llabs(center.mdV[0] - getCenter().mdV[0]) < F_APPROXIMATELY_ZERO && -					llabs(center.mdV[1] - getCenter().mdV[1]) < F_APPROXIMATELY_ZERO && -					llabs(center.mdV[2] - getCenter().mdV[2]) < F_APPROXIMATELY_ZERO) +				LLVector4a val; +				val.setSub(center, getCenter()); +				val.setAbs(val); +								 +				S32 lt = val.lessThan(LLVector4a::getEpsilon()).getGatheredBits() & 0x7; + +				if( lt == 0x7 )  				{  					mData.insert(data);  					BaseType::insert(data); + +					mElementCount = mData.size();  					return true;  				} @@ -344,7 +360,7 @@ public:  				//make sure no existing node matches this position  				for (U32 i = 0; i < getChildCount(); i++)  				{ -					if (mChild[i]->getCenter() == center) +					if (mChild[i]->getCenter().equals3(center))  					{  						OCT_ERRS << "Octree detected duplicate child center and gave up." << llendl;  						return false; @@ -362,7 +378,7 @@ public:  		else   		{  			//it's not in here, give it to the root -			//OCT_ERRS << "Octree insertion failed, starting over from root!" << llendl; +			OCT_ERRS << "Octree insertion failed, starting over from root!" << llendl;  			oct_node* node = this; @@ -383,6 +399,7 @@ public:  		if (mData.find(data) != mData.end())  		{	//we have data  			mData.erase(data); +			mElementCount = mData.size();  			notifyRemoval(data);  			checkAlive();  			return true; @@ -420,6 +437,7 @@ public:          if (mData.find(data) != mData.end())  		{  			mData.erase(data); +			mElementCount = mData.size();  			notifyRemoval(data);  			llwarns << "FOUND!" << llendl;  			checkAlive(); @@ -436,6 +454,9 @@ public:  	void clearChildren()  	{  		mChild.clear(); +		mChildCount = 0; +		U32* foo = (U32*) mChildMap; +		foo[0] = foo[1] = 0xFFFFFFFF;  	}  	void validate() @@ -469,13 +490,19 @@ public:  	void addChild(oct_node* child, BOOL silent = FALSE)   	{  #if LL_OCTREE_PARANOIA_CHECK + +		if (child->getSize().equals3(getSize())) +		{ +			OCT_ERRS << "Child size is same as parent size!" << llendl; +		} +  		for (U32 i = 0; i < getChildCount(); i++)  		{ -			if(mChild[i]->getSize() != child->getSize())  +			if(!mChild[i]->getSize().equals3(child->getSize()))   			{  				OCT_ERRS <<"Invalid octree child size." << llendl;  			} -			if (mChild[i]->getCenter() == child->getCenter()) +			if (mChild[i]->getCenter().equals3(child->getCenter()))  			{  				OCT_ERRS <<"Duplicate octree child position." << llendl;  			} @@ -487,7 +514,10 @@ public:  		}  #endif +		mChildMap[child->getOctant()] = mChildCount; +  		mChild.push_back(child); +		++mChildCount;  		child->setParent(this);  		if (!silent) @@ -500,20 +530,30 @@ public:  		}  	} -	void removeChild(U8 index, BOOL destroy = FALSE) +	void removeChild(S32 index, BOOL destroy = FALSE)  	{  		for (U32 i = 0; i < this->getListenerCount(); i++)  		{  			oct_listener* listener = getOctListener(i);  			listener->handleChildRemoval(this, getChild(index));  		} - +		  		if (destroy)  		{  			mChild[index]->destroy();  			delete mChild[index];  		}  		mChild.erase(mChild.begin() + index); +		--mChildCount; + +		//rebuild child map +		U32* foo = (U32*) mChildMap; +		foo[0] = foo[1] = 0xFFFFFFFF; + +		for (U32 i = 0; i < mChildCount; ++i) +		{ +			mChildMap[mChild[i]->getOctant()] = i; +		}  		checkAlive();  	} @@ -541,19 +581,34 @@ public:  			}  		} -		//OCT_ERRS << "Octree failed to delete requested child." << llendl; +		OCT_ERRS << "Octree failed to delete requested child." << llendl;  	}  protected:	 -	child_list mChild; -	element_list mData; +	typedef enum +	{ +		CENTER = 0, +		SIZE = 1, +		MAX = 2, +		MIN = 3 +	} eDName; + +	LLVector4a mCenter; +	LLVector4a mSize; +	LLVector4a mMax; +	LLVector4a mMin; +	  	oct_node* mParent; -	LLVector3d mCenter; -	LLVector3d mSize; -	LLVector3d mMax; -	LLVector3d mMin;  	U8 mOctant; -}; + +	child_list mChild; +	U8 mChildMap[8]; +	U32 mChildCount; + +	element_list mData; +	U32 mElementCount; +		 +};   //just like a regular node, except it might expand on insert and compress on balance  template <class T> @@ -563,9 +618,9 @@ public:  	typedef LLOctreeNode<T>	BaseType;  	typedef LLOctreeNode<T>		oct_node; -	LLOctreeRoot(	LLVector3d center,  -					LLVector3d size,  -					BaseType* parent) +	LLOctreeRoot(const LLVector4a& center,  +				 const LLVector4a& size,  +				 BaseType* parent)  	:	BaseType(center, size, parent)  	{  	} @@ -596,6 +651,8 @@ public:  			//destroy child  			child->clearChildren();  			delete child; + +			return false;  		}  		return true; @@ -606,28 +663,33 @@ public:  	{  		if (data == NULL)   		{ -			//OCT_ERRS << "!!! INVALID ELEMENT ADDED TO OCTREE ROOT !!!" << llendl; +			OCT_ERRS << "!!! INVALID ELEMENT ADDED TO OCTREE ROOT !!!" << llendl;  			return false;  		}  		if (data->getBinRadius() > 4096.0)  		{ -			//OCT_ERRS << "!!! ELEMENT EXCEEDS MAXIMUM SIZE IN OCTREE ROOT !!!" << llendl; +			OCT_ERRS << "!!! ELEMENT EXCEEDS MAXIMUM SIZE IN OCTREE ROOT !!!" << llendl;  			return false;  		} -		const F64 MAX_MAG = 1024.0*1024.0; +		LLVector4a MAX_MAG; +		MAX_MAG.splat(1024.f*1024.f); + +		const LLVector4a& v = data->getPositionGroup(); -		const LLVector3d& v = data->getPositionGroup(); -		if (!(fabs(v.mdV[0]-this->mCenter.mdV[0]) < MAX_MAG && -		      fabs(v.mdV[1]-this->mCenter.mdV[1]) < MAX_MAG && -		      fabs(v.mdV[2]-this->mCenter.mdV[2]) < MAX_MAG)) +		LLVector4a val; +		val.setSub(v, BaseType::mCenter); +		val.setAbs(val); +		S32 lt = val.lessThan(MAX_MAG).getGatheredBits() & 0x7; + +		if (lt != 0x7)  		{  			//OCT_ERRS << "!!! ELEMENT EXCEEDS RANGE OF SPATIAL PARTITION !!!" << llendl;  			return false;  		} -		if (this->getSize().mdV[0] > data->getBinRadius() && isInside(data->getPositionGroup())) +		if (this->getSize()[0] > data->getBinRadius() && isInside(data->getPositionGroup()))  		{  			//we got it, just act like a branch  			oct_node* node = getNodeAt(data); @@ -643,31 +705,34 @@ public:  		else if (this->getChildCount() == 0)  		{  			//first object being added, just wrap it up -			while (!(this->getSize().mdV[0] > data->getBinRadius() && isInside(data->getPositionGroup()))) +			while (!(this->getSize()[0] > data->getBinRadius() && isInside(data->getPositionGroup())))  			{ -				LLVector3d center, size; +				LLVector4a center, size;  				center = this->getCenter();  				size = this->getSize();  				LLOctreeNode<T>::pushCenter(center, size, data);  				this->setCenter(center); -				this->setSize(size*2); +				size.mul(2.f); +				this->setSize(size);  				this->updateMinMax();  			}  			LLOctreeNode<T>::insert(data);  		}  		else  		{ -			while (!(this->getSize().mdV[0] > data->getBinRadius() && isInside(data->getPositionGroup()))) +			while (!(this->getSize()[0] > data->getBinRadius() && isInside(data->getPositionGroup())))  			{  				//the data is outside the root node, we need to grow -				LLVector3d center(this->getCenter()); -				LLVector3d size(this->getSize()); +				LLVector4a center(this->getCenter()); +				LLVector4a size(this->getSize());  				//expand this node -				LLVector3d newcenter(center); +				LLVector4a newcenter(center);  				LLOctreeNode<T>::pushCenter(newcenter, size, data);  				this->setCenter(newcenter); -				this->setSize(size*2); +				LLVector4a size2 = size; +				size2.mul(2.f); +				this->setSize(size2);  				this->updateMinMax();  				//copy our children to a new branch @@ -704,4 +769,15 @@ void LLOctreeTraveler<T>::traverse(const LLOctreeNode<T>* node)  		traverse(node->getChild(i));  	}  } + +template <class T> +void LLOctreeTravelerDepthFirst<T>::traverse(const LLOctreeNode<T>* node) +{ +	for (U32 i = 0; i < node->getChildCount(); i++) +	{ +		traverse(node->getChild(i)); +	} +	node->accept(this); +} +  #endif diff --git a/indra/llmath/llplane.h b/indra/llmath/llplane.h index 443f3f46b9..a611894721 100644 --- a/indra/llmath/llplane.h +++ b/indra/llmath/llplane.h @@ -36,19 +36,23 @@  // The plane normal = [A, B, C]  // The closest approach = D / sqrt(A*A + B*B + C*C) -class LLPlane : public LLVector4 +class LLPlane  {  public: +	 +	// Constructors  	LLPlane() {}; // no default constructor  	LLPlane(const LLVector3 &p0, F32 d) { setVec(p0, d); }  	LLPlane(const LLVector3 &p0, const LLVector3 &n) { setVec(p0, n); } -	void setVec(const LLVector3 &p0, F32 d) { LLVector4::setVec(p0[0], p0[1], p0[2], d); } -	void setVec(const LLVector3 &p0, const LLVector3 &n) +	inline void setVec(const LLVector3 &p0, F32 d) { mV.set(p0[0], p0[1], p0[2], d); } +	 +	// Set +	inline void setVec(const LLVector3 &p0, const LLVector3 &n)  	{  		F32 d = -(p0 * n);  		setVec(n, d);  	} -	void setVec(const LLVector3 &p0, const LLVector3 &p1, const LLVector3 &p2) +	inline void setVec(const LLVector3 &p0, const LLVector3 &p1, const LLVector3 &p2)  	{  		LLVector3 u, v, w;  		u = p1 - p0; @@ -58,8 +62,38 @@ public:  		F32 d = -(w * p0);  		setVec(w, d);  	} -	LLPlane& operator=(const LLVector4& v2) {  LLVector4::setVec(v2[0],v2[1],v2[2],v2[3]); return *this;} +	 +	inline LLPlane& operator=(const LLVector4& v2) {  mV.set(v2[0],v2[1],v2[2],v2[3]); return *this;} +	 +	inline LLPlane& operator=(const LLVector4a& v2) {  mV.set(v2[0],v2[1],v2[2],v2[3]); return *this;}	 +	 +	inline void set(const LLPlane& p2) { mV = p2.mV; } +	 +	//   	F32 dist(const LLVector3 &v2) const { return mV[0]*v2[0] + mV[1]*v2[1] + mV[2]*v2[2] + mV[3]; } +	 +	inline LLSimdScalar dot3(const LLVector4a& b) const { return mV.dot3(b); } +	 +	// Read-only access a single float in this vector. Do not use in proximity to any function call that manipulates +	// the data at the whole vector level or you will incur a substantial penalty. Consider using the splat functions instead	 +	inline F32 operator[](const S32 idx) const { return mV[idx]; } +	 +	// preferable when index is known at compile time +	template <int N> LL_FORCE_INLINE void getAt(LLSimdScalar& v) const { v = mV.getScalarAt<N>(); }  +	 +	// reset the vector to 0, 0, 0, 1 +	inline void clear() { mV.set(0, 0, 0, 1); } +	 +	inline void getVector3(LLVector3& vec) const { vec.set(mV[0], mV[1], mV[2]); } +	 +	// Retrieve the mask indicating which of the x, y, or z axis are greater or equal to zero. +	inline U8 calcPlaneMask()  +	{  +		return mV.greaterEqual(LLVector4a::getZero()).getGatheredBits() & LLVector4Logical::MASK_XYZ; +	} +		 +private: +	LLVector4a mV;  }; diff --git a/indra/llmath/llquantize.h b/indra/llmath/llquantize.h index 7f56ff3448..1595dbecf8 100644 --- a/indra/llmath/llquantize.h +++ b/indra/llmath/llquantize.h @@ -29,10 +29,16 @@  #define LL_LLQUANTIZE_H  const U16 U16MAX = 65535; +LL_ALIGN_16( const F32 F_U16MAX_4A[4] ) = { 65535.f, 65535.f, 65535.f, 65535.f }; +  const F32 OOU16MAX = 1.f/(F32)(U16MAX); +LL_ALIGN_16( const F32 F_OOU16MAX_4A[4] ) = { OOU16MAX, OOU16MAX, OOU16MAX, OOU16MAX };  const U8 U8MAX = 255; +LL_ALIGN_16( const F32 F_U8MAX_4A[4] ) = { 255.f, 255.f, 255.f, 255.f }; +  const F32 OOU8MAX = 1.f/(F32)(U8MAX); +LL_ALIGN_16( const F32 F_OOU8MAX_4A[4] ) = { OOU8MAX, OOU8MAX, OOU8MAX, OOU8MAX };  const U8 FIRSTVALIDCHAR = 54;  const U8 MAXSTRINGVAL = U8MAX - FIRSTVALIDCHAR; //we don't allow newline or null  diff --git a/indra/llmath/llquaternion.cpp b/indra/llmath/llquaternion.cpp index a51f11072c..7381d5eb99 100644 --- a/indra/llmath/llquaternion.cpp +++ b/indra/llmath/llquaternion.cpp @@ -26,9 +26,10 @@  #include "linden_common.h" +#include "llmath.h"	// for F_PI +  #include "llquaternion.h" -#include "llmath.h"	// for F_PI  //#include "vmath.h"  #include "v3math.h"  #include "v3dmath.h" diff --git a/indra/llmath/llquaternion.h b/indra/llmath/llquaternion.h index 26da14ae20..ca0dfe206b 100644 --- a/indra/llmath/llquaternion.h +++ b/indra/llmath/llquaternion.h @@ -27,7 +27,11 @@  #ifndef LLQUATERNION_H  #define LLQUATERNION_H -#include "llmath.h" +#include <iostream> + +#ifndef LLMATH_H //enforce specific include order to avoid tangling inline dependencies +#error "Please include llmath.h first." +#endif  class LLVector4;  class LLVector3; diff --git a/indra/llmath/llquaternion2.h b/indra/llmath/llquaternion2.h new file mode 100644 index 0000000000..fd9c0cf3ab --- /dev/null +++ b/indra/llmath/llquaternion2.h @@ -0,0 +1,105 @@ +/**  + * @file llquaternion2.h + * @brief LLQuaternion2 class header file - SIMD-enabled quaternion class + * + * $LicenseInfo:firstyear=2010&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2010, Linden Research, Inc. + *  + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + *  + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * Lesser General Public License for more details. + *  + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA + *  + * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA + * $/LicenseInfo$ + */ + +#ifndef	LL_QUATERNION2_H +#define	LL_QUATERNION2_H + +///////////////////////////// +// LLQuaternion2 +///////////////////////////// +// This class stores a quaternion x*i + y*j + z*k + w in <x, y, z, w> order +// (i.e., w in high order element of vector) +///////////////////////////// +///////////////////////////// +// These classes are intentionally minimal right now. If you need additional +// functionality, please contact someone with SSE experience (e.g., Falcon or +// Huseby). +///////////////////////////// +#include "llquaternion.h" + +class LLQuaternion2 +{ +public: + +	////////////////////////// +	// Ctors +	////////////////////////// +	 +	// Ctor +	LLQuaternion2() {} + +	// Ctor from LLQuaternion +	explicit LLQuaternion2( const class LLQuaternion& quat ); + +	////////////////////////// +	// Get/Set +	////////////////////////// + +	// Load from an LLQuaternion +	inline void operator=( const LLQuaternion& quat ) +	{ +		mQ.loadua( quat.mQ ); +	} + +	// Return the internal LLVector4a representation of the quaternion +	inline const LLVector4a& getVector4a() const; +	inline LLVector4a& getVector4aRw(); + +	///////////////////////// +	// Quaternion modification +	///////////////////////// +	 +	// Set this quaternion to the conjugate of src +	inline void setConjugate(const LLQuaternion2& src); + +	// Renormalizes the quaternion. Assumes it has nonzero length. +	inline void normalize(); + +	// Quantize this quaternion to 8 bit precision +	inline void quantize8(); + +	// Quantize this quaternion to 16 bit precision +	inline void quantize16(); + +	///////////////////////// +	// Quaternion inspection +	///////////////////////// + +	// Return true if this quaternion is equal to 'rhs'.  +	// Note! Quaternions exhibit "double-cover", so any rotation has two equally valid +	// quaternion representations and they will NOT compare equal. +	inline bool equals(const LLQuaternion2& rhs, F32 tolerance = F_APPROXIMATELY_ZERO ) const; + +	// Return true if all components are finite and the quaternion is normalized +	inline bool isOkRotation() const; + +protected: + +	LLVector4a mQ; + +}; + +#endif diff --git a/indra/llmath/llquaternion2.inl b/indra/llmath/llquaternion2.inl new file mode 100644 index 0000000000..2a6987552d --- /dev/null +++ b/indra/llmath/llquaternion2.inl @@ -0,0 +1,102 @@ +/**  + * @file llquaternion2.inl + * @brief LLQuaternion2 inline definitions + * + * $LicenseInfo:firstyear=2010&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2010, Linden Research, Inc. + *  + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + *  + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * Lesser General Public License for more details. + *  + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA + *  + * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA + * $/LicenseInfo$ + */ + +#include "llquaternion2.h" + +static const LLQuad LL_V4A_PLUS_ONE = {1.f, 1.f, 1.f, 1.f}; +static const LLQuad LL_V4A_MINUS_ONE = {-1.f, -1.f, -1.f, -1.f}; + +// Ctor from LLQuaternion +inline LLQuaternion2::LLQuaternion2( const LLQuaternion& quat ) +{ +	mQ.set(quat.mQ[VX], quat.mQ[VY], quat.mQ[VZ], quat.mQ[VW]); +} + +////////////////////////// +// Get/Set +////////////////////////// + +// Return the internal LLVector4a representation of the quaternion +inline const LLVector4a& LLQuaternion2::getVector4a() const +{ +	return mQ; +} + +inline LLVector4a& LLQuaternion2::getVector4aRw() +{ +	return mQ; +} + +///////////////////////// +// Quaternion modification +///////////////////////// + +// Set this quaternion to the conjugate of src +inline void LLQuaternion2::setConjugate(const LLQuaternion2& src) +{ +	static LL_ALIGN_16( const U32 F_QUAT_INV_MASK_4A[4] ) = { 0x80000000, 0x80000000, 0x80000000, 0x00000000 }; +	mQ = _mm_xor_ps(src.mQ, *reinterpret_cast<const LLQuad*>(&F_QUAT_INV_MASK_4A));	 +} + +// Renormalizes the quaternion. Assumes it has nonzero length. +inline void LLQuaternion2::normalize() +{ +	mQ.normalize4(); +} + +// Quantize this quaternion to 8 bit precision +inline void LLQuaternion2::quantize8() +{ +	mQ.quantize8( LL_V4A_MINUS_ONE, LL_V4A_PLUS_ONE ); +	normalize(); +} + +// Quantize this quaternion to 16 bit precision +inline void LLQuaternion2::quantize16() +{ +	mQ.quantize16( LL_V4A_MINUS_ONE, LL_V4A_PLUS_ONE ); +	normalize(); +} + + +///////////////////////// +// Quaternion inspection +///////////////////////// + +// Return true if this quaternion is equal to 'rhs'.  +// Note! Quaternions exhibit "double-cover", so any rotation has two equally valid +// quaternion representations and they will NOT compare equal. +inline bool LLQuaternion2::equals(const LLQuaternion2 &rhs, F32 tolerance/* = F_APPROXIMATELY_ZERO*/) const +{ +	return mQ.equals4(rhs.mQ, tolerance); +} + +// Return true if all components are finite and the quaternion is normalized +inline bool LLQuaternion2::isOkRotation() const +{ +	return mQ.isFinite4() && mQ.isNormalized4(); +} + diff --git a/indra/llmath/llsimdmath.h b/indra/llmath/llsimdmath.h new file mode 100644 index 0000000000..c7cdf7b32c --- /dev/null +++ b/indra/llmath/llsimdmath.h @@ -0,0 +1,93 @@ +/**  + * @file llsimdmath.h + * @brief Common header for SIMD-based math library (llvector4a, llmatrix3a, etc.) + * + * $LicenseInfo:firstyear=2010&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2010, Linden Research, Inc. + *  + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + *  + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * Lesser General Public License for more details. + *  + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA + *  + * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA + * $/LicenseInfo$ + */ + +#ifndef	LL_SIMD_MATH_H +#define	LL_SIMD_MATH_H + +#ifndef LLMATH_H +#error "Please include llmath.h before this file." +#endif + +#if ( ( LL_DARWIN || LL_LINUX ) && !(__SSE2__) ) || ( LL_WINDOWS && ( _M_IX86_FP < 2 ) ) +#error SSE2 not enabled. LLVector4a and related class will not compile. +#endif + +#if !LL_WINDOWS +#include <stdint.h> +#endif + +template <typename T> T* LL_NEXT_ALIGNED_ADDRESS(T* address)  +{  +	return reinterpret_cast<T*>( +		(reinterpret_cast<uintptr_t>(address) + 0xF) & ~0xF); +} + +template <typename T> T* LL_NEXT_ALIGNED_ADDRESS_64(T* address)  +{  +	return reinterpret_cast<T*>( +		(reinterpret_cast<uintptr_t>(address) + 0x3F) & ~0x3F); +} + +#if LL_LINUX || LL_DARWIN + +#define			LL_ALIGN_PREFIX(x) +#define			LL_ALIGN_POSTFIX(x)		__attribute__((aligned(x))) + +#elif LL_WINDOWS + +#define			LL_ALIGN_PREFIX(x)		__declspec(align(x)) +#define			LL_ALIGN_POSTFIX(x) + +#else +#error "LL_ALIGN_PREFIX and LL_ALIGN_POSTFIX undefined" +#endif + +#define LL_ALIGN_16(var) LL_ALIGN_PREFIX(16) var LL_ALIGN_POSTFIX(16) + + + +#include <xmmintrin.h> +#include <emmintrin.h> + +#include "llsimdtypes.h" +#include "llsimdtypes.inl" + +class LLMatrix3a; +class LLRotation; +class LLMatrix3; + +#include "llquaternion.h" + +#include "llvector4logical.h" +#include "llvector4a.h" +#include "llmatrix3a.h" +#include "llquaternion2.h" +#include "llvector4a.inl" +#include "llmatrix3a.inl" +#include "llquaternion2.inl" + + +#endif //LL_SIMD_MATH_H diff --git a/indra/llmath/llsimdtypes.h b/indra/llmath/llsimdtypes.h new file mode 100644 index 0000000000..bd991d0e71 --- /dev/null +++ b/indra/llmath/llsimdtypes.h @@ -0,0 +1,124 @@ +/**  + * @file llsimdtypes.h + * @brief Declaration of basic SIMD math related types + * + * $LicenseInfo:firstyear=2010&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2010, Linden Research, Inc. + *  + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + *  + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * Lesser General Public License for more details. + *  + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA + *  + * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA + * $/LicenseInfo$ + */ + +#ifndef LL_SIMD_TYPES_H +#define LL_SIMD_TYPES_H + +#ifndef LL_SIMD_MATH_H +#error "Please include llmath.h before this file." +#endif + +typedef __m128	LLQuad; + + +#if LL_WINDOWS +#pragma warning(push) +#pragma warning( disable : 4800 3 ) // Disable warning about casting int to bool for this class. +#if defined(_MSC_VER) && (_MSC_VER < 1500) +// VC++ 2005 is missing these intrinsics +// __forceinline is MSVC specific and attempts to override compiler inlining judgment. This is so +// even in debug builds this call is a NOP. +__forceinline const __m128 _mm_castsi128_ps( const __m128i a ) { return reinterpret_cast<const __m128&>(a); } +__forceinline const __m128i _mm_castps_si128( const __m128 a ) { return reinterpret_cast<const __m128i&>(a); } +#endif // _MSC_VER + +#endif // LL_WINDOWS + +class LLBool32 +{ +public: +	inline LLBool32() {} +	inline LLBool32(int rhs) : m_bool(rhs) {} +	inline LLBool32(unsigned int rhs) : m_bool(rhs) {} +	inline LLBool32(bool rhs) { m_bool = static_cast<const int>(rhs); } +	inline LLBool32& operator= (bool rhs) { m_bool = (int)rhs; return *this; } +	inline bool operator== (bool rhs) const { return static_cast<const bool&>(m_bool) == rhs; } +	inline bool operator!= (bool rhs) const { return !operator==(rhs); } +	inline operator bool() const { return static_cast<const bool&>(m_bool); } + +private: +	int m_bool; +}; + +#if LL_WINDOWS +#pragma warning(pop) +#endif + +class LLSimdScalar +{ +public: +	inline LLSimdScalar() {} +	inline LLSimdScalar(LLQuad q)  +	{  +		mQ = q;  +	} + +	inline LLSimdScalar(F32 f)  +	{  +		mQ = _mm_set_ss(f);  +	} + +	static inline const LLSimdScalar& getZero() +	{ +		extern const LLQuad F_ZERO_4A; +		return reinterpret_cast<const LLSimdScalar&>(F_ZERO_4A); +	} + +	inline F32 getF32() const; + +	inline LLBool32 isApproximatelyEqual(const LLSimdScalar& rhs, F32 tolerance = F_APPROXIMATELY_ZERO) const; + +	inline LLSimdScalar getAbs() const; + +	inline void setMax( const LLSimdScalar& a, const LLSimdScalar& b ); +	 +	inline void setMin( const LLSimdScalar& a, const LLSimdScalar& b ); + +	inline LLSimdScalar& operator=(F32 rhs); + +	inline LLSimdScalar& operator+=(const LLSimdScalar& rhs); + +	inline LLSimdScalar& operator-=(const LLSimdScalar& rhs); + +	inline LLSimdScalar& operator*=(const LLSimdScalar& rhs); + +	inline LLSimdScalar& operator/=(const LLSimdScalar& rhs); + +	inline operator LLQuad() const +	{  +		return mQ;  +	} +	 +	inline const LLQuad& getQuad() const  +	{  +		return mQ;  +	} + +private: +	LLQuad mQ; +}; + +#endif //LL_SIMD_TYPES_H diff --git a/indra/llmath/llsimdtypes.inl b/indra/llmath/llsimdtypes.inl new file mode 100644 index 0000000000..712239e425 --- /dev/null +++ b/indra/llmath/llsimdtypes.inl @@ -0,0 +1,157 @@ +/**  + * @file llsimdtypes.inl + * @brief Inlined definitions of basic SIMD math related types + * + * $LicenseInfo:firstyear=2010&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2010, Linden Research, Inc. + *  + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + *  + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * Lesser General Public License for more details. + *  + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA + *  + * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA + * $/LicenseInfo$ + */ + + + + +////////////////// +// LLSimdScalar +////////////////// + +inline LLSimdScalar operator+(const LLSimdScalar& a, const LLSimdScalar& b) +{ +	LLSimdScalar t(a); +	t += b; +	return t; +} + +inline LLSimdScalar operator-(const LLSimdScalar& a, const LLSimdScalar& b) +{ +	LLSimdScalar t(a); +	t -= b; +	return t; +} + +inline LLSimdScalar operator*(const LLSimdScalar& a, const LLSimdScalar& b) +{ +	LLSimdScalar t(a); +	t *= b; +	return t; +} + +inline LLSimdScalar operator/(const LLSimdScalar& a, const LLSimdScalar& b) +{ +	LLSimdScalar t(a); +	t /= b; +	return t; +} + +inline LLSimdScalar operator-(const LLSimdScalar& a) +{ +	static LL_ALIGN_16(const U32 signMask[4]) = {0x80000000, 0x80000000, 0x80000000, 0x80000000 }; +	return _mm_xor_ps(*reinterpret_cast<const LLQuad*>(signMask), a); +} + +inline LLBool32 operator==(const LLSimdScalar& a, const LLSimdScalar& b) +{ +	return _mm_comieq_ss(a, b); +} + +inline LLBool32 operator!=(const LLSimdScalar& a, const LLSimdScalar& b) +{ +	return _mm_comineq_ss(a, b); +} + +inline LLBool32 operator<(const LLSimdScalar& a, const LLSimdScalar& b) +{ +	return _mm_comilt_ss(a, b); +} + +inline LLBool32 operator<=(const LLSimdScalar& a, const LLSimdScalar& b) +{ +	return _mm_comile_ss(a, b); +} + +inline LLBool32 operator>(const LLSimdScalar& a, const LLSimdScalar& b) +{ +	return _mm_comigt_ss(a, b); +} + +inline LLBool32 operator>=(const LLSimdScalar& a, const LLSimdScalar& b) +{ +	return _mm_comige_ss(a, b); +} + +inline LLBool32 LLSimdScalar::isApproximatelyEqual(const LLSimdScalar& rhs, F32 tolerance /* = F_APPROXIMATELY_ZERO */) const +{ +	const LLSimdScalar tol( tolerance ); +	const LLSimdScalar diff = _mm_sub_ss( mQ, rhs.mQ ); +	const LLSimdScalar absDiff = diff.getAbs(); +	return absDiff <= tol; +} + +inline void LLSimdScalar::setMax( const LLSimdScalar& a, const LLSimdScalar& b ) +{ +	mQ = _mm_max_ss( a, b ); +} + +inline void LLSimdScalar::setMin( const LLSimdScalar& a, const LLSimdScalar& b ) +{ +	mQ = _mm_min_ss( a, b ); +} + +inline LLSimdScalar& LLSimdScalar::operator=(F32 rhs)  +{  +	mQ = _mm_set_ss(rhs);  +	return *this;  +} + +inline LLSimdScalar& LLSimdScalar::operator+=(const LLSimdScalar& rhs)  +{ +	mQ = _mm_add_ss( mQ, rhs ); +	return *this; +} + +inline LLSimdScalar& LLSimdScalar::operator-=(const LLSimdScalar& rhs) +{ +	mQ = _mm_sub_ss( mQ, rhs ); +	return *this; +} + +inline LLSimdScalar& LLSimdScalar::operator*=(const LLSimdScalar& rhs) +{ +	mQ = _mm_mul_ss( mQ, rhs ); +	return *this; +} + +inline LLSimdScalar& LLSimdScalar::operator/=(const LLSimdScalar& rhs) +{ +	mQ = _mm_div_ss( mQ, rhs ); +	return *this; +} + +inline LLSimdScalar LLSimdScalar::getAbs() const +{ +	static const LL_ALIGN_16(U32 F_ABS_MASK_4A[4]) = { 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF }; +	return _mm_and_ps( mQ, *reinterpret_cast<const LLQuad*>(F_ABS_MASK_4A)); +} + +inline F32 LLSimdScalar::getF32() const +{  +	F32 ret;  +	_mm_store_ss(&ret, mQ);  +	return ret;  +} diff --git a/indra/llmath/lltreenode.h b/indra/llmath/lltreenode.h index a462d1659e..c66bc26176 100644 --- a/indra/llmath/lltreenode.h +++ b/indra/llmath/lltreenode.h @@ -28,6 +28,9 @@  #include "stdtypes.h"  #include "xform.h" +#include "llpointer.h" +#include "llrefcount.h" +  #include <vector>  template <class T> class LLTreeNode; diff --git a/indra/llmath/llv4math.h b/indra/llmath/llv4math.h deleted file mode 100644 index 5f403ba526..0000000000 --- a/indra/llmath/llv4math.h +++ /dev/null @@ -1,141 +0,0 @@ -/**  - * @file llv4math.h - * @brief LLV4* class header file - vector processor enabled math - * - * $LicenseInfo:firstyear=2007&license=viewerlgpl$ - * Second Life Viewer Source Code - * Copyright (C) 2010, Linden Research, Inc. - *  - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; - * version 2.1 of the License only. - *  - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU - * Lesser General Public License for more details. - *  - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA - *  - * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA - * $/LicenseInfo$ - */ - -#ifndef	LL_LLV4MATH_H -#define	LL_LLV4MATH_H - -// *NOTE: We do not support SSE acceleration on Windows builds. -// Our minimum specification for the viewer includes 1 GHz Athlon processors, -// which covers the Athlon Thunderbird series that does not support SSE. -// -// Our header files include statements like this -//   const F32 HAVOK_TIMESTEP = 1.f / 45.f; -// This creates "globals" that are included in each .obj file.  If a single -// .cpp file has SSE code generation turned on (eg, llviewerjointmesh_sse.cpp) -// these globals will be initialized using SSE instructions.  This causes SL -// to crash before main() on processors without SSE.  Untangling all these  -// headers/variables is too much work for the small performance gains of  -// vectorization. -// -// Therefore we only support vectorization on builds where the everything is  -// built with SSE or Altivec.  See https://jira.secondlife.com/browse/VWR-1610 -// and https://jira.lindenlab.com/browse/SL-47720 for details. -// -// Sorry the code is such a mess. JC - -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- -// LLV4MATH - GNUC -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- - -#if LL_GNUC && __GNUC__ >= 4 && __SSE__ - -#define			LL_VECTORIZE					1 - -#if LL_DARWIN - -#include <Accelerate/Accelerate.h> -#include <xmmintrin.h> -typedef vFloat	V4F32; - -#else - -#include <xmmintrin.h> -typedef float	V4F32							__attribute__((vector_size(16))); - -#endif - -#endif -#if LL_GNUC - -#define			LL_LLV4MATH_ALIGN_PREFIX -#define			LL_LLV4MATH_ALIGN_POSTFIX		__attribute__((aligned(16))) - -#endif - -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- -// LLV4MATH - MSVC -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- - -// Only vectorize if the entire Windows build uses SSE. -// _M_IX86_FP is set when SSE code generation is turned on, and I have -// confirmed this in VS2003, VS2003 SP1, and VS2005. JC -#if LL_MSVC && _M_IX86_FP - -#define			LL_VECTORIZE					1 - -#include <xmmintrin.h> - -typedef __m128	V4F32; - -#endif -#if LL_MSVC - -#define			LL_LLV4MATH_ALIGN_PREFIX		__declspec(align(16)) -#define			LL_LLV4MATH_ALIGN_POSTFIX - -#endif - -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- -// LLV4MATH - default - no vectorization -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- - -#if !LL_VECTORIZE - -#define			LL_VECTORIZE					0 - -struct			V4F32							{ F32 __pad__[4]; }; - -inline F32 llv4lerp(F32 a, F32 b, F32 w)		{ return ( b - a ) * w + a; } - -#endif - -#ifndef			LL_LLV4MATH_ALIGN_PREFIX -#	define			LL_LLV4MATH_ALIGN_PREFIX -#endif -#ifndef			LL_LLV4MATH_ALIGN_POSTFIX -#	define			LL_LLV4MATH_ALIGN_POSTFIX -#endif - -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- -// LLV4MATH -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- - - -#define			LLV4_NUM_AXIS					4 - -class LLV4Vector3; -class LLV4Matrix3; -class LLV4Matrix4; - -#endif diff --git a/indra/llmath/llv4matrix3.h b/indra/llmath/llv4matrix3.h deleted file mode 100644 index 270f5d7dae..0000000000 --- a/indra/llmath/llv4matrix3.h +++ /dev/null @@ -1,220 +0,0 @@ -/**  - * @file llviewerjointmesh.cpp - * @brief LLV4* class header file - vector processor enabled math - * - * $LicenseInfo:firstyear=2007&license=viewerlgpl$ - * Second Life Viewer Source Code - * Copyright (C) 2010, Linden Research, Inc. - *  - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; - * version 2.1 of the License only. - *  - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU - * Lesser General Public License for more details. - *  - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA - *  - * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA - * $/LicenseInfo$ - */ - -#ifndef LL_LLV4MATRIX3_H -#define LL_LLV4MATRIX3_H - -#include "llv4math.h" -#include "llv4vector3.h" -#include "m3math.h"			// for operator LLMatrix3() - -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- -// LLV4Matrix3 -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- - -LL_LLV4MATH_ALIGN_PREFIX - -class LLV4Matrix3 -{ -public: -	union { -		F32		mMatrix[LLV4_NUM_AXIS][LLV4_NUM_AXIS]; -		V4F32	mV[LLV4_NUM_AXIS]; -	}; - -	void				lerp(const LLV4Matrix3 &a, const LLV4Matrix3 &b, const F32 &w); -	void				multiply(const LLVector3 &a, LLVector3& out) const; -	void				multiply(const LLVector4 &a, LLV4Vector3& out) const; -	void				multiply(const LLVector3 &a, LLV4Vector3& out) const; - -	const LLV4Matrix3&	transpose(); -	const LLV4Matrix3&	operator=(const LLMatrix3& a); - -	operator			LLMatrix3()	const { return (reinterpret_cast<const LLMatrix4*>(const_cast<const F32*>(&mMatrix[0][0])))->getMat3(); } - -	friend LLVector3	operator*(const LLVector3& a, const LLV4Matrix3& b); -} - -LL_LLV4MATH_ALIGN_POSTFIX; - - - -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- -// LLV4Matrix3 - SSE -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- - -#if LL_VECTORIZE - -inline void LLV4Matrix3::lerp(const LLV4Matrix3 &a, const LLV4Matrix3 &b, const F32 &w) -{ -	__m128 vw = _mm_set1_ps(w); -	mV[VX] = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(b.mV[VX], a.mV[VX]), vw), a.mV[VX]); // ( b - a ) * w + a -	mV[VY] = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(b.mV[VY], a.mV[VY]), vw), a.mV[VY]); -	mV[VZ] = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(b.mV[VZ], a.mV[VZ]), vw), a.mV[VZ]); -} - -inline void LLV4Matrix3::multiply(const LLVector3 &a, LLVector3& o) const -{ -	LLV4Vector3 j; -	j.v = 				 	_mm_mul_ps(_mm_set1_ps(a.mV[VX]), mV[VX]); // ( ax * vx ) + ... -	j.v = _mm_add_ps(j.v  , _mm_mul_ps(_mm_set1_ps(a.mV[VY]), mV[VY])); -	j.v = _mm_add_ps(j.v  , _mm_mul_ps(_mm_set1_ps(a.mV[VZ]), mV[VZ])); -	o.setVec(j.mV); -} - -inline void LLV4Matrix3::multiply(const LLVector4 &a, LLV4Vector3& o) const -{ -	o.v =					_mm_mul_ps(_mm_set1_ps(a.mV[VX]), mV[VX]); // ( ax * vx ) + ... -	o.v = _mm_add_ps(o.v  , _mm_mul_ps(_mm_set1_ps(a.mV[VY]), mV[VY])); -	o.v = _mm_add_ps(o.v  , _mm_mul_ps(_mm_set1_ps(a.mV[VZ]), mV[VZ])); -} - -inline void LLV4Matrix3::multiply(const LLVector3 &a, LLV4Vector3& o) const -{ -	o.v =					_mm_mul_ps(_mm_set1_ps(a.mV[VX]), mV[VX]); // ( ax * vx ) + ... -	o.v = _mm_add_ps(o.v  , _mm_mul_ps(_mm_set1_ps(a.mV[VY]), mV[VY])); -	o.v = _mm_add_ps(o.v  , _mm_mul_ps(_mm_set1_ps(a.mV[VZ]), mV[VZ])); -} - -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- -// LLV4Matrix3 -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- - -#else - -inline void LLV4Matrix3::lerp(const LLV4Matrix3 &a, const LLV4Matrix3 &b, const F32 &w) -{ -	mMatrix[VX][VX] = llv4lerp(a.mMatrix[VX][VX], b.mMatrix[VX][VX], w); -	mMatrix[VX][VY] = llv4lerp(a.mMatrix[VX][VY], b.mMatrix[VX][VY], w); -	mMatrix[VX][VZ] = llv4lerp(a.mMatrix[VX][VZ], b.mMatrix[VX][VZ], w); - -	mMatrix[VY][VX] = llv4lerp(a.mMatrix[VY][VX], b.mMatrix[VY][VX], w); -	mMatrix[VY][VY] = llv4lerp(a.mMatrix[VY][VY], b.mMatrix[VY][VY], w); -	mMatrix[VY][VZ] = llv4lerp(a.mMatrix[VY][VZ], b.mMatrix[VY][VZ], w); - -	mMatrix[VZ][VX] = llv4lerp(a.mMatrix[VZ][VX], b.mMatrix[VZ][VX], w); -	mMatrix[VZ][VY] = llv4lerp(a.mMatrix[VZ][VY], b.mMatrix[VZ][VY], w); -	mMatrix[VZ][VZ] = llv4lerp(a.mMatrix[VZ][VZ], b.mMatrix[VZ][VZ], w); -} - -inline void LLV4Matrix3::multiply(const LLVector3 &a, LLVector3& o) const -{ -	o.setVec(		a.mV[VX] * mMatrix[VX][VX] +  -					a.mV[VY] * mMatrix[VY][VX] +  -					a.mV[VZ] * mMatrix[VZ][VX], -					  -					a.mV[VX] * mMatrix[VX][VY] +  -					a.mV[VY] * mMatrix[VY][VY] +  -					a.mV[VZ] * mMatrix[VZ][VY], -					  -					a.mV[VX] * mMatrix[VX][VZ] +  -					a.mV[VY] * mMatrix[VY][VZ] +  -					a.mV[VZ] * mMatrix[VZ][VZ]); -} - -inline void LLV4Matrix3::multiply(const LLVector4 &a, LLV4Vector3& o) const -{ -	o.setVec(		a.mV[VX] * mMatrix[VX][VX] +  -					a.mV[VY] * mMatrix[VY][VX] +  -					a.mV[VZ] * mMatrix[VZ][VX], -					  -					a.mV[VX] * mMatrix[VX][VY] +  -					a.mV[VY] * mMatrix[VY][VY] +  -					a.mV[VZ] * mMatrix[VZ][VY], -					  -					a.mV[VX] * mMatrix[VX][VZ] +  -					a.mV[VY] * mMatrix[VY][VZ] +  -					a.mV[VZ] * mMatrix[VZ][VZ]); -} - -inline void LLV4Matrix3::multiply(const LLVector3 &a, LLV4Vector3& o) const -{ -	o.setVec(		a.mV[VX] * mMatrix[VX][VX] +  -					a.mV[VY] * mMatrix[VY][VX] +  -					a.mV[VZ] * mMatrix[VZ][VX], -					  -					a.mV[VX] * mMatrix[VX][VY] +  -					a.mV[VY] * mMatrix[VY][VY] +  -					a.mV[VZ] * mMatrix[VZ][VY], -					  -					a.mV[VX] * mMatrix[VX][VZ] +  -					a.mV[VY] * mMatrix[VY][VZ] +  -					a.mV[VZ] * mMatrix[VZ][VZ]); -} - -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- -// LLV4Matrix3 -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- - -#endif - -inline const LLV4Matrix3&	LLV4Matrix3::transpose() -{ -#if LL_VECTORIZE && defined(_MM_TRANSPOSE4_PS) -	_MM_TRANSPOSE4_PS(mV[VX], mV[VY], mV[VZ], mV[VW]); -	return *this; -#else -	F32 temp; -	temp = mMatrix[VX][VY]; mMatrix[VX][VY] = mMatrix[VY][VX]; mMatrix[VY][VX] = temp; -	temp = mMatrix[VX][VZ]; mMatrix[VX][VZ] = mMatrix[VZ][VX]; mMatrix[VZ][VX] = temp; -	temp = mMatrix[VY][VZ]; mMatrix[VY][VZ] = mMatrix[VZ][VY]; mMatrix[VZ][VY] = temp; -#endif -	return *this; -} - -inline const LLV4Matrix3& LLV4Matrix3::operator=(const LLMatrix3& a) -{ -	memcpy(mMatrix[VX], a.mMatrix[VX], sizeof(F32) * 3 ); -	memcpy(mMatrix[VY], a.mMatrix[VY], sizeof(F32) * 3 ); -	memcpy(mMatrix[VZ], a.mMatrix[VZ], sizeof(F32) * 3 ); -	return *this; -} - -inline LLVector3 operator*(const LLVector3& a, const LLV4Matrix3& b) -{ -	return LLVector3( -				a.mV[VX] * b.mMatrix[VX][VX] +  -				a.mV[VY] * b.mMatrix[VY][VX] +  -				a.mV[VZ] * b.mMatrix[VZ][VX], -	 -				a.mV[VX] * b.mMatrix[VX][VY] +  -				a.mV[VY] * b.mMatrix[VY][VY] +  -				a.mV[VZ] * b.mMatrix[VZ][VY], -	 -				a.mV[VX] * b.mMatrix[VX][VZ] +  -				a.mV[VY] * b.mMatrix[VY][VZ] +  -				a.mV[VZ] * b.mMatrix[VZ][VZ] ); -} - -#endif diff --git a/indra/llmath/llv4matrix4.h b/indra/llmath/llv4matrix4.h deleted file mode 100644 index 2eb49d9294..0000000000 --- a/indra/llmath/llv4matrix4.h +++ /dev/null @@ -1,249 +0,0 @@ -/**  - * @file llviewerjointmesh.cpp - * @brief LLV4* class header file - vector processor enabled math - * - * $LicenseInfo:firstyear=2007&license=viewerlgpl$ - * Second Life Viewer Source Code - * Copyright (C) 2010, Linden Research, Inc. - *  - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; - * version 2.1 of the License only. - *  - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU - * Lesser General Public License for more details. - *  - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA - *  - * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA - * $/LicenseInfo$ - */ - -#ifndef LL_LLV4MATRIX4_H -#define LL_LLV4MATRIX4_H - -#include "llv4math.h" -#include "llv4matrix3.h"	// just for operator LLV4Matrix3() -#include "llv4vector3.h" - -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- -// LLV4Matrix4 -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- - -LL_LLV4MATH_ALIGN_PREFIX - -class LLV4Matrix4 -{ -public: -	union { -		F32		mMatrix[LLV4_NUM_AXIS][LLV4_NUM_AXIS]; -		V4F32	mV[LLV4_NUM_AXIS]; -	}; - -	void				lerp(const LLV4Matrix4 &a, const LLV4Matrix4 &b, const F32 &w); -	void				multiply(const LLVector3 &a, LLVector3& o) const; -	void				multiply(const LLVector3 &a, LLV4Vector3& o) const; - -	const LLV4Matrix4&	transpose(); -	const LLV4Matrix4&  translate(const LLVector3 &vec); -	const LLV4Matrix4&  translate(const LLV4Vector3 &vec); -	const LLV4Matrix4&	operator=(const LLMatrix4& a); - -	operator			LLMatrix4()	const { return *(reinterpret_cast<const LLMatrix4*>(const_cast<const F32*>(&mMatrix[0][0]))); } -	operator			LLV4Matrix3()	const { return *(reinterpret_cast<const LLV4Matrix3*>(const_cast<const F32*>(&mMatrix[0][0]))); } -	 -	friend LLVector3	operator*(const LLVector3 &a, const LLV4Matrix4 &b); -} - -LL_LLV4MATH_ALIGN_POSTFIX; - -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- -// LLV4Matrix4 - SSE -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- - -#if LL_VECTORIZE - -inline void LLV4Matrix4::lerp(const LLV4Matrix4 &a, const LLV4Matrix4 &b, const F32 &w) -{ -	__m128 vw = _mm_set1_ps(w); -	mV[VX] = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(b.mV[VX], a.mV[VX]), vw), a.mV[VX]); // ( b - a ) * w + a -	mV[VY] = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(b.mV[VY], a.mV[VY]), vw), a.mV[VY]); -	mV[VZ] = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(b.mV[VZ], a.mV[VZ]), vw), a.mV[VZ]); -	mV[VW] = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(b.mV[VW], a.mV[VW]), vw), a.mV[VW]); -} - -inline void LLV4Matrix4::multiply(const LLVector3 &a, LLVector3& o) const -{ -	LLV4Vector3 j; -	j.v = _mm_add_ps(mV[VW], _mm_mul_ps(_mm_set1_ps(a.mV[VX]), mV[VX])); // ( ax * vx ) + vw -	j.v = _mm_add_ps(j.v   , _mm_mul_ps(_mm_set1_ps(a.mV[VY]), mV[VY])); -	j.v = _mm_add_ps(j.v   , _mm_mul_ps(_mm_set1_ps(a.mV[VZ]), mV[VZ])); -	o.setVec(j.mV); -} - -inline void LLV4Matrix4::multiply(const LLVector3 &a, LLV4Vector3& o) const -{ -	o.v = _mm_add_ps(mV[VW], _mm_mul_ps(_mm_set1_ps(a.mV[VX]), mV[VX])); // ( ax * vx ) + vw -	o.v = _mm_add_ps(o.v   , _mm_mul_ps(_mm_set1_ps(a.mV[VY]), mV[VY])); -	o.v = _mm_add_ps(o.v   , _mm_mul_ps(_mm_set1_ps(a.mV[VZ]), mV[VZ])); -} - -inline const LLV4Matrix4& LLV4Matrix4::translate(const LLV4Vector3 &vec) -{ -	mV[VW] = _mm_add_ps(mV[VW], vec.v); -	return (*this); -} - -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- -// LLV4Matrix4 -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- - -#else - -inline void LLV4Matrix4::lerp(const LLV4Matrix4 &a, const LLV4Matrix4 &b, const F32 &w) -{ -	mMatrix[VX][VX] = llv4lerp(a.mMatrix[VX][VX], b.mMatrix[VX][VX], w); -	mMatrix[VX][VY] = llv4lerp(a.mMatrix[VX][VY], b.mMatrix[VX][VY], w); -	mMatrix[VX][VZ] = llv4lerp(a.mMatrix[VX][VZ], b.mMatrix[VX][VZ], w); - -	mMatrix[VY][VX] = llv4lerp(a.mMatrix[VY][VX], b.mMatrix[VY][VX], w); -	mMatrix[VY][VY] = llv4lerp(a.mMatrix[VY][VY], b.mMatrix[VY][VY], w); -	mMatrix[VY][VZ] = llv4lerp(a.mMatrix[VY][VZ], b.mMatrix[VY][VZ], w); - -	mMatrix[VZ][VX] = llv4lerp(a.mMatrix[VZ][VX], b.mMatrix[VZ][VX], w); -	mMatrix[VZ][VY] = llv4lerp(a.mMatrix[VZ][VY], b.mMatrix[VZ][VY], w); -	mMatrix[VZ][VZ] = llv4lerp(a.mMatrix[VZ][VZ], b.mMatrix[VZ][VZ], w); - -	mMatrix[VW][VX] = llv4lerp(a.mMatrix[VW][VX], b.mMatrix[VW][VX], w); -	mMatrix[VW][VY] = llv4lerp(a.mMatrix[VW][VY], b.mMatrix[VW][VY], w); -	mMatrix[VW][VZ] = llv4lerp(a.mMatrix[VW][VZ], b.mMatrix[VW][VZ], w); -} - -inline void LLV4Matrix4::multiply(const LLVector3 &a, LLVector3& o) const -{ -	o.setVec(		a.mV[VX] * mMatrix[VX][VX] +  -					a.mV[VY] * mMatrix[VY][VX] +  -					a.mV[VZ] * mMatrix[VZ][VX] + -					mMatrix[VW][VX], -					  -					a.mV[VX] * mMatrix[VX][VY] +  -					a.mV[VY] * mMatrix[VY][VY] +  -					a.mV[VZ] * mMatrix[VZ][VY] + -					mMatrix[VW][VY], -					  -					a.mV[VX] * mMatrix[VX][VZ] +  -					a.mV[VY] * mMatrix[VY][VZ] +  -					a.mV[VZ] * mMatrix[VZ][VZ] + -					mMatrix[VW][VZ]); -} - -inline void LLV4Matrix4::multiply(const LLVector3 &a, LLV4Vector3& o) const -{ -	o.setVec(		a.mV[VX] * mMatrix[VX][VX] +  -					a.mV[VY] * mMatrix[VY][VX] +  -					a.mV[VZ] * mMatrix[VZ][VX] + -					mMatrix[VW][VX], -					  -					a.mV[VX] * mMatrix[VX][VY] +  -					a.mV[VY] * mMatrix[VY][VY] +  -					a.mV[VZ] * mMatrix[VZ][VY] + -					mMatrix[VW][VY], -					  -					a.mV[VX] * mMatrix[VX][VZ] +  -					a.mV[VY] * mMatrix[VY][VZ] +  -					a.mV[VZ] * mMatrix[VZ][VZ] + -					mMatrix[VW][VZ]); -} - -inline const LLV4Matrix4& LLV4Matrix4::translate(const LLV4Vector3 &vec) -{ -	mMatrix[3][0] += vec.mV[0]; -	mMatrix[3][1] += vec.mV[1]; -	mMatrix[3][2] += vec.mV[2]; -	return (*this); -} - -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- -// LLV4Matrix4 -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- - -#endif - -inline const LLV4Matrix4& LLV4Matrix4::operator=(const LLMatrix4& a) -{ -	memcpy(mMatrix, a.mMatrix, sizeof(F32) * 16 ); -	return *this; -} - -inline const LLV4Matrix4& LLV4Matrix4::transpose() -{ -#if LL_VECTORIZE && defined(_MM_TRANSPOSE4_PS) -	_MM_TRANSPOSE4_PS(mV[VX], mV[VY], mV[VZ], mV[VW]); -#else -	LLV4Matrix4 mat; -	mat.mMatrix[0][0] = mMatrix[0][0]; -	mat.mMatrix[1][0] = mMatrix[0][1]; -	mat.mMatrix[2][0] = mMatrix[0][2]; -	mat.mMatrix[3][0] = mMatrix[0][3]; - -	mat.mMatrix[0][1] = mMatrix[1][0]; -	mat.mMatrix[1][1] = mMatrix[1][1]; -	mat.mMatrix[2][1] = mMatrix[1][2]; -	mat.mMatrix[3][1] = mMatrix[1][3]; - -	mat.mMatrix[0][2] = mMatrix[2][0]; -	mat.mMatrix[1][2] = mMatrix[2][1]; -	mat.mMatrix[2][2] = mMatrix[2][2]; -	mat.mMatrix[3][2] = mMatrix[2][3]; - -	mat.mMatrix[0][3] = mMatrix[3][0]; -	mat.mMatrix[1][3] = mMatrix[3][1]; -	mat.mMatrix[2][3] = mMatrix[3][2]; -	mat.mMatrix[3][3] = mMatrix[3][3]; - -	*this = mat; -#endif -	return *this; -} - -inline const LLV4Matrix4& LLV4Matrix4::translate(const LLVector3 &vec) -{ -	mMatrix[3][0] += vec.mV[0]; -	mMatrix[3][1] += vec.mV[1]; -	mMatrix[3][2] += vec.mV[2]; -	return (*this); -} - -inline LLVector3 operator*(const LLVector3 &a, const LLV4Matrix4 &b) -{ -	return LLVector3(a.mV[VX] * b.mMatrix[VX][VX] +  -					 a.mV[VY] * b.mMatrix[VY][VX] +  -					 a.mV[VZ] * b.mMatrix[VZ][VX] + -					 b.mMatrix[VW][VX], -					  -					 a.mV[VX] * b.mMatrix[VX][VY] +  -					 a.mV[VY] * b.mMatrix[VY][VY] +  -					 a.mV[VZ] * b.mMatrix[VZ][VY] + -					 b.mMatrix[VW][VY], -					  -					 a.mV[VX] * b.mMatrix[VX][VZ] +  -					 a.mV[VY] * b.mMatrix[VY][VZ] +  -					 a.mV[VZ] * b.mMatrix[VZ][VZ] + -					 b.mMatrix[VW][VZ]); -} - - -#endif diff --git a/indra/llmath/llv4vector3.h b/indra/llmath/llv4vector3.h deleted file mode 100644 index a340d53f5a..0000000000 --- a/indra/llmath/llv4vector3.h +++ /dev/null @@ -1,80 +0,0 @@ -/**  - * @file llviewerjointmesh.cpp - * @brief LLV4* class header file - vector processor enabled math - * - * $LicenseInfo:firstyear=2007&license=viewerlgpl$ - * Second Life Viewer Source Code - * Copyright (C) 2010, Linden Research, Inc. - *  - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; - * version 2.1 of the License only. - *  - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU - * Lesser General Public License for more details. - *  - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA - *  - * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA - * $/LicenseInfo$ - */ - -#ifndef LL_LLV4VECTOR3_H -#define LL_LLV4VECTOR3_H - -#include "llv4math.h" - -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- -// LLV4Vector3 -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- - -LL_LLV4MATH_ALIGN_PREFIX - -class LLV4Vector3 -{ -public: -	union { -		F32		mV[LLV4_NUM_AXIS]; -		V4F32	v; -	}; - -	enum { -		ALIGNMENT = 16 -		}; - -	void				setVec(F32 x, F32 y, F32 z); -	void				setVec(F32 a); -} - -LL_LLV4MATH_ALIGN_POSTFIX; - -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- -// LLV4Vector3 -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- - -inline void	LLV4Vector3::setVec(F32 x, F32 y, F32 z) -{ -	mV[VX] = x; -	mV[VY] = y; -	mV[VZ] = z; -} - -inline void	LLV4Vector3::setVec(F32 a) -{ -#if LL_VECTORIZE -	v = _mm_set1_ps(a); -#else -	setVec(a, a, a); -#endif -} - -#endif diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp new file mode 100644 index 0000000000..b66b7a7076 --- /dev/null +++ b/indra/llmath/llvector4a.cpp @@ -0,0 +1,222 @@ +/**  + * @file llvector4a.cpp + * @brief SIMD vector implementation + * + * $LicenseInfo:firstyear=2010&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2010, Linden Research, Inc. + *  + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + *  + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * Lesser General Public License for more details. + *  + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA + *  + * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA + * $/LicenseInfo$ + */ + +#include "llmath.h" +#include "llquantize.h" + +extern const LLQuad F_ZERO_4A		= { 0, 0, 0, 0 }; +extern const LLQuad F_APPROXIMATELY_ZERO_4A = {  +	F_APPROXIMATELY_ZERO, +	F_APPROXIMATELY_ZERO, +	F_APPROXIMATELY_ZERO, +	F_APPROXIMATELY_ZERO +}; + +extern const LLVector4a LL_V4A_ZERO = reinterpret_cast<const LLVector4a&> ( F_ZERO_4A ); +extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast<const LLVector4a&> ( F_APPROXIMATELY_ZERO_4A ); + +/*static */void LLVector4a::memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes) +{ +	assert(src != NULL); +	assert(dst != NULL); +	assert(bytes > 0); +	assert((bytes % sizeof(F32))== 0);  +	 +	F32* end = dst + (bytes / sizeof(F32) ); + +	if (bytes > 64) +	{ +		F32* begin_64 = LL_NEXT_ALIGNED_ADDRESS_64(dst); +		 +		//at least 64 (16*4) bytes before the end of the destination, switch to 16 byte copies +		F32* end_64 = end-16; +		 +		_mm_prefetch((char*)begin_64, _MM_HINT_NTA); +		_mm_prefetch((char*)begin_64 + 64, _MM_HINT_NTA); +		_mm_prefetch((char*)begin_64 + 128, _MM_HINT_NTA); +		_mm_prefetch((char*)begin_64 + 192, _MM_HINT_NTA); +		 +		while (dst < begin_64) +		{ +			copy4a(dst, src); +			dst += 4; +			src += 4; +		} +		 +		while (dst < end_64) +		{ +			_mm_prefetch((char*)src + 512, _MM_HINT_NTA); +			_mm_prefetch((char*)dst + 512, _MM_HINT_NTA); +			copy4a(dst, src); +			copy4a(dst+4, src+4); +			copy4a(dst+8, src+8); +			copy4a(dst+12, src+12); +			 +			dst += 16; +			src += 16; +		} +	} + +	while (dst < end) +	{ +		copy4a(dst, src); +		dst += 4; +		src += 4; +	} +} + +void LLVector4a::setRotated( const LLRotation& rot, const LLVector4a& vec ) +{ +	const LLVector4a col0 = rot.getColumn(0); +	const LLVector4a col1 = rot.getColumn(1); +	const LLVector4a col2 = rot.getColumn(2); + +	LLVector4a result = _mm_load_ss( vec.getF32ptr() ); +	result.splat<0>( result ); +	result.mul( col0 ); + +	{ +		LLVector4a yyyy = _mm_load_ss( vec.getF32ptr() +  1 ); +		yyyy.splat<0>( yyyy ); +		yyyy.mul( col1 );  +		result.add( yyyy ); +	} + +	{ +		LLVector4a zzzz = _mm_load_ss( vec.getF32ptr() +  2 ); +		zzzz.splat<0>( zzzz ); +		zzzz.mul( col2 ); +		result.add( zzzz ); +	} + +	*this = result; +} + +void LLVector4a::setRotated( const LLQuaternion2& quat, const LLVector4a& vec ) +{ +	const LLVector4a& quatVec = quat.getVector4a(); +	LLVector4a temp; temp.setCross3(quatVec, vec); +	temp.add( temp ); +	 +	const LLVector4a realPart( quatVec.getScalarAt<3>() ); +	LLVector4a tempTimesReal; tempTimesReal.setMul( temp, realPart ); + +	mQ = vec; +	add( tempTimesReal ); +	 +	LLVector4a imagCrossTemp; imagCrossTemp.setCross3( quatVec, temp ); +	add(imagCrossTemp); +} + +void LLVector4a::quantize8( const LLVector4a& low, const LLVector4a& high ) +{ +	LLVector4a val(mQ); +	LLVector4a delta; delta.setSub( high, low ); + +	{ +		val.clamp(low, high); +		val.sub(low); + +		// 8-bit quantization means we can do with just 12 bits of reciprocal accuracy +		const LLVector4a oneOverDelta = _mm_rcp_ps(delta.mQ); +// 		{ +// 			static LL_ALIGN_16( const F32 F_TWO_4A[4] ) = { 2.f, 2.f, 2.f, 2.f }; +// 			LLVector4a two; two.load4a( F_TWO_4A ); +//  +// 			// Here we use _mm_rcp_ps plus one round of newton-raphson +// 			// We wish to find 'x' such that x = 1/delta +// 			// As a first approximation, we take x0 = _mm_rcp_ps(delta) +// 			// Then x1 = 2 * x0 - a * x0^2 or x1 = x0 * ( 2 - a * x0 ) +// 			// See Intel AP-803 http://ompf.org/!/Intel_application_note_AP-803.pdf +// 			const LLVector4a recipApprox = _mm_rcp_ps(delta.mQ); +// 			oneOverDelta.setMul( delta, recipApprox ); +// 			oneOverDelta.setSub( two, oneOverDelta ); +// 			oneOverDelta.mul( recipApprox ); +// 		} + +		val.mul(oneOverDelta); +		val.mul(*reinterpret_cast<const LLVector4a*>(F_U8MAX_4A)); +	} + +	val = _mm_cvtepi32_ps(_mm_cvtps_epi32( val.mQ )); + +	{ +		val.mul(*reinterpret_cast<const LLVector4a*>(F_OOU8MAX_4A)); +		val.mul(delta); +		val.add(low); +	} + +	{ +		LLVector4a maxError; maxError.setMul(delta, *reinterpret_cast<const LLVector4a*>(F_OOU8MAX_4A)); +		LLVector4a absVal; absVal.setAbs( val ); +		setSelectWithMask( absVal.lessThan( maxError ), F_ZERO_4A, val ); +	}	 +} + +void LLVector4a::quantize16( const LLVector4a& low, const LLVector4a& high ) +{ +	LLVector4a val(mQ); +	LLVector4a delta; delta.setSub( high, low ); + +	{ +		val.clamp(low, high); +		val.sub(low); + +		// 16-bit quantization means we need a round of Newton-Raphson +		LLVector4a oneOverDelta; +		{ +			static LL_ALIGN_16( const F32 F_TWO_4A[4] ) = { 2.f, 2.f, 2.f, 2.f }; +			LLVector4a two; two.load4a( F_TWO_4A ); + +			// Here we use _mm_rcp_ps plus one round of newton-raphson +			// We wish to find 'x' such that x = 1/delta +			// As a first approximation, we take x0 = _mm_rcp_ps(delta) +			// Then x1 = 2 * x0 - a * x0^2 or x1 = x0 * ( 2 - a * x0 ) +			// See Intel AP-803 http://ompf.org/!/Intel_application_note_AP-803.pdf +			const LLVector4a recipApprox = _mm_rcp_ps(delta.mQ); +			oneOverDelta.setMul( delta, recipApprox ); +			oneOverDelta.setSub( two, oneOverDelta ); +			oneOverDelta.mul( recipApprox ); +		} + +		val.mul(oneOverDelta); +		val.mul(*reinterpret_cast<const LLVector4a*>(F_U16MAX_4A)); +	} + +	val = _mm_cvtepi32_ps(_mm_cvtps_epi32( val.mQ )); + +	{ +		val.mul(*reinterpret_cast<const LLVector4a*>(F_OOU16MAX_4A)); +		val.mul(delta); +		val.add(low); +	} + +	{ +		LLVector4a maxError; maxError.setMul(delta, *reinterpret_cast<const LLVector4a*>(F_OOU16MAX_4A)); +		LLVector4a absVal; absVal.setAbs( val ); +		setSelectWithMask( absVal.lessThan( maxError ), F_ZERO_4A, val ); +	}	 +} diff --git a/indra/llmath/llvector4a.h b/indra/llmath/llvector4a.h new file mode 100644 index 0000000000..596082509d --- /dev/null +++ b/indra/llmath/llvector4a.h @@ -0,0 +1,324 @@ +/**  + * @file llvector4a.h + * @brief LLVector4a class header file - memory aligned and vectorized 4 component vector + * + * $LicenseInfo:firstyear=2010&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2010, Linden Research, Inc. + *  + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + *  + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * Lesser General Public License for more details. + *  + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA + *  + * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA + * $/LicenseInfo$ + */ + +#ifndef	LL_LLVECTOR4A_H +#define	LL_LLVECTOR4A_H + + +class LLRotation; + +#include <assert.h> +#include "llpreprocessor.h" + +/////////////////////////////////// +// FIRST TIME USERS PLEASE READ +////////////////////////////////// +// This is just the beginning of LLVector4a. There are many more useful functions +// yet to be implemented. For example, setNeg to negate a vector, rotate() to apply +// a matrix rotation, various functions to manipulate only the X, Y, and Z elements +// and many others (including a whole variety of accessors). So if you don't see a  +// function here that you need, please contact Falcon or someone else with SSE  +// experience (Richard, I think, has some and davep has a little as of the time  +// of this writing, July 08, 2010) about getting it implemented before you resort to +// LLVector3/LLVector4.  +///////////////////////////////// + +class LLVector4a +{ +public: + +	/////////////////////////////////// +	// STATIC METHODS +	/////////////////////////////////// +	 +	// Call initClass() at startup to avoid 15,000+ cycle penalties from denormalized numbers +	static void initClass() +	{ +		_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); +		_MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST); +	} + +	// Return a vector of all zeros +	static inline const LLVector4a& getZero() +	{ +		extern const LLVector4a LL_V4A_ZERO; +		return LL_V4A_ZERO; +	} +	 +	// Return a vector of all epsilon, where epsilon is a small float suitable for approximate equality checks +	static inline const LLVector4a& getEpsilon() +	{ +		extern const LLVector4a LL_V4A_EPSILON; +		return LL_V4A_EPSILON; +	} + +	// Copy 16 bytes from src to dst. Source and destination must be 16-byte aligned +	static inline void copy4a(F32* dst, const F32* src) +	{ +		_mm_store_ps(dst, _mm_load_ps(src)); +	} + +	// Copy words 16-byte blocks from src to dst. Source and destination must not overlap.  +	static void memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes); + +	//////////////////////////////////// +	// CONSTRUCTORS  +	//////////////////////////////////// +	 +	LLVector4a() +	{ //DO NOT INITIALIZE -- The overhead is completely unnecessary +	} +	 +	LLVector4a(F32 x, F32 y, F32 z, F32 w = 0.f) +	{ +		set(x,y,z,w); +	} +	 +	LLVector4a(F32 x) +	{ +		splat(x); +	} +	 +	LLVector4a(const LLSimdScalar& x) +	{ +		splat(x); +	} + +	LLVector4a(LLQuad q) +	{ +		mQ = q; +	} + +	//////////////////////////////////// +	// LOAD/STORE +	//////////////////////////////////// +	 +	// Load from 16-byte aligned src array (preferred method of loading) +	inline void load4a(const F32* src); +	 +	// Load from unaligned src array (NB: Significantly slower than load4a) +	inline void loadua(const F32* src); +	 +	// Load only three floats beginning at address 'src'. Slowest method. +	inline void load3(const F32* src); +	 +	// Store to a 16-byte aligned memory address +	inline void store4a(F32* dst) const; +	 +	//////////////////////////////////// +	// BASIC GET/SET  +	//////////////////////////////////// +	 +	// Return a "this" as an F32 pointer. Do not use unless you have a very good reason.  (Not sure? Ask Falcon) +	inline F32* getF32ptr(); +	 +	// Return a "this" as a const F32 pointer. Do not use unless you have a very good reason.  (Not sure? Ask Falcon) +	inline const F32* const getF32ptr() const; +	 +	// Read-only access a single float in this vector. Do not use in proximity to any function call that manipulates +	// the data at the whole vector level or you will incur a substantial penalty. Consider using the splat functions instead +	inline F32 operator[](const S32 idx) const; + +	// Prefer this method for read-only access to a single element. Prefer the templated version if the elem is known at compile time. +	inline LLSimdScalar getScalarAt(const S32 idx) const; + +	// Prefer this method for read-only access to a single element. Prefer the templated version if the elem is known at compile time. +	template <int N> LL_FORCE_INLINE LLSimdScalar getScalarAt() const; + +	// Set to an x, y, z and optional w provided +	inline void set(F32 x, F32 y, F32 z, F32 w = 0.f); +	 +	// Set to all zeros. This is preferred to using ::getZero() +	inline void clear(); +	 +	// Set all elements to 'x' +	inline void splat(const F32 x); + +	// Set all elements to 'x' +	inline void splat(const LLSimdScalar& x); +	 +	// Set all 4 elements to element N of src, with N known at compile time +	template <int N> void splat(const LLVector4a& src); +	 +	// Set all 4 elements to element i of v, with i NOT known at compile time +	inline void splat(const LLVector4a& v, U32 i); +	 +	// Select bits from sourceIfTrue and sourceIfFalse according to bits in mask +	inline void setSelectWithMask( const LLVector4Logical& mask, const LLVector4a& sourceIfTrue, const LLVector4a& sourceIfFalse ); +	 +	//////////////////////////////////// +	// ALGEBRAIC +	//////////////////////////////////// +	 +	// Set this to the element-wise (a + b) +	inline void setAdd(const LLVector4a& a, const LLVector4a& b); +	 +	// Set this to element-wise (a - b) +	inline void setSub(const LLVector4a& a, const LLVector4a& b); +	 +	// Set this to element-wise multiply (a * b) +	inline void setMul(const LLVector4a& a, const LLVector4a& b); +	 +	// Set this to element-wise quotient (a / b) +	inline void setDiv(const LLVector4a& a, const LLVector4a& b); +	 +	// Set this to the element-wise absolute value of src +	inline void setAbs(const LLVector4a& src); +	 +	// Add to each component in this vector the corresponding component in rhs +	inline void add(const LLVector4a& rhs); +	 +	// Subtract from each component in this vector the corresponding component in rhs +	inline void sub(const LLVector4a& rhs); +	 +	// Multiply each component in this vector by the corresponding component in rhs +	inline void mul(const LLVector4a& rhs); +	 +	// Divide each component in this vector by the corresponding component in rhs +	inline void div(const LLVector4a& rhs); +	 +	// Multiply this vector by x in a scalar fashion +	inline void mul(const F32 x); + +	// Set this to (a x b) (geometric cross-product) +	inline void setCross3(const LLVector4a& a, const LLVector4a& b); +	 +	// Set all elements to the dot product of the x, y, and z elements in a and b +	inline void setAllDot3(const LLVector4a& a, const LLVector4a& b); + +	// Set all elements to the dot product of the x, y, z, and w elements in a and b +	inline void setAllDot4(const LLVector4a& a, const LLVector4a& b); + +	// Return the 3D dot product of this vector and b +	inline LLSimdScalar dot3(const LLVector4a& b) const; + +	// Return the 4D dot product of this vector and b +	inline LLSimdScalar dot4(const LLVector4a& b) const; + +	// Normalize this vector with respect to the x, y, and z components only. Accurate to 22 bites of precision. W component is destroyed +	// Note that this does not consider zero length vectors! +	inline void normalize3(); + +	// Same as normalize3() but with respect to all 4 components +	inline void normalize4(); + +	// Same as normalize3(), but returns length as a SIMD scalar +	inline LLSimdScalar normalize3withLength(); + +	// Normalize this vector with respect to the x, y, and z components only. Accurate only to 10-12 bits of precision. W component is destroyed +	// Note that this does not consider zero length vectors! +	inline void normalize3fast(); + +	// Return true if this vector is normalized with respect to x,y,z up to tolerance +	inline LLBool32 isNormalized3( F32 tolerance = 1e-3 ) const; + +	// Return true if this vector is normalized with respect to all components up to tolerance +	inline LLBool32 isNormalized4( F32 tolerance = 1e-3 ) const; + +	// Set all elements to the length of vector 'v'  +	inline void setAllLength3( const LLVector4a& v ); + +	// Get this vector's length +	inline LLSimdScalar getLength3() const; +	 +	// Set the components of this vector to the minimum of the corresponding components of lhs and rhs +	inline void setMin(const LLVector4a& lhs, const LLVector4a& rhs); +	 +	// Set the components of this vector to the maximum of the corresponding components of lhs and rhs +	inline void setMax(const LLVector4a& lhs, const LLVector4a& rhs); +	 +	// Clamps this vector to be within the component-wise range low to high (inclusive) +	inline void clamp( const LLVector4a& low, const LLVector4a& high ); + +	// Set this to  (c * lhs) + rhs * ( 1 - c) +	inline void setLerp(const LLVector4a& lhs, const LLVector4a& rhs, F32 c); +	 +	// Return true (nonzero) if x, y, z (and w for Finite4) are all finite floats +	inline LLBool32 isFinite3() const;	 +	inline LLBool32 isFinite4() const; + +	// Set this vector to 'vec' rotated by the LLRotation or LLQuaternion2 provided +	void setRotated( const LLRotation& rot, const LLVector4a& vec ); +	void setRotated( const class LLQuaternion2& quat, const LLVector4a& vec ); + +	// Set this vector to 'vec' rotated by the INVERSE of the LLRotation or LLQuaternion2 provided +	inline void setRotatedInv( const LLRotation& rot, const LLVector4a& vec ); +	inline void setRotatedInv( const class LLQuaternion2& quat, const LLVector4a& vec ); + +	// Quantize this vector to 8 or 16 bit precision +	void quantize8( const LLVector4a& low, const LLVector4a& high ); +	void quantize16( const LLVector4a& low, const LLVector4a& high ); + +	//////////////////////////////////// +	// LOGICAL +	////////////////////////////////////	 +	// The functions in this section will compare the elements in this vector +	// to those in rhs and return an LLVector4Logical with all bits set in elements +	// where the comparison was true and all bits unset in elements where the comparison +	// was false. See llvector4logica.h +	//////////////////////////////////// +	// WARNING: Other than equals3 and equals4, these functions do NOT account +	// for floating point tolerance. You should include the appropriate tolerance +	// in the inputs. +	//////////////////////////////////// +	 +	inline LLVector4Logical greaterThan(const LLVector4a& rhs) const; + +	inline LLVector4Logical lessThan(const LLVector4a& rhs) const; +	 +	inline LLVector4Logical greaterEqual(const LLVector4a& rhs) const; + +	inline LLVector4Logical lessEqual(const LLVector4a& rhs) const; +	 +	inline LLVector4Logical equal(const LLVector4a& rhs) const; + +	// Returns true if this and rhs are componentwise equal up to the specified absolute tolerance +	inline bool equals4(const LLVector4a& rhs, F32 tolerance = F_APPROXIMATELY_ZERO ) const; + +	inline bool equals3(const LLVector4a& rhs, F32 tolerance = F_APPROXIMATELY_ZERO ) const; + +	//////////////////////////////////// +	// OPERATORS +	////////////////////////////////////	 +	 +	// Do NOT add aditional operators without consulting someone with SSE experience +	inline const LLVector4a& operator= ( const LLVector4a& rhs ); +	 +	inline const LLVector4a& operator= ( const LLQuad& rhs ); + +	inline operator LLQuad() const;	 + +private: +	LLQuad mQ; +}; + +inline void update_min_max(LLVector4a& min, LLVector4a& max, const LLVector4a& p) +{ +	min.setMin(min, p); +	max.setMax(max, p); +} + +#endif diff --git a/indra/llmath/llvector4a.inl b/indra/llmath/llvector4a.inl new file mode 100644 index 0000000000..7ad22a5631 --- /dev/null +++ b/indra/llmath/llvector4a.inl @@ -0,0 +1,593 @@ +/**  + * @file llvector4a.inl + * @brief LLVector4a inline function implementations + * + * $LicenseInfo:firstyear=2010&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2010, Linden Research, Inc. + *  + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + *  + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * Lesser General Public License for more details. + *  + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA + *  + * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA + * $/LicenseInfo$ + */ + +//////////////////////////////////// +// LOAD/STORE +//////////////////////////////////// + +// Load from 16-byte aligned src array (preferred method of loading) +inline void LLVector4a::load4a(const F32* src) +{ +	mQ = _mm_load_ps(src); +} + +// Load from unaligned src array (NB: Significantly slower than load4a) +inline void LLVector4a::loadua(const F32* src) +{ +	mQ = _mm_loadu_ps(src); +} + +// Load only three floats beginning at address 'src'. Slowest method. +inline void LLVector4a::load3(const F32* src) +{ +	// mQ = { 0.f, src[2], src[1], src[0] } = { W, Z, Y, X } +	// NB: This differs from the convention of { Z, Y, X, W } +	mQ = _mm_set_ps(0.f, src[2], src[1], src[0]); +}	 + +// Store to a 16-byte aligned memory address +inline void LLVector4a::store4a(F32* dst) const +{ +	_mm_store_ps(dst, mQ); +} + +//////////////////////////////////// +// BASIC GET/SET  +//////////////////////////////////// + +// Return a "this" as an F32 pointer. Do not use unless you have a very good reason.  (Not sure? Ask Falcon) +F32* LLVector4a::getF32ptr() +{ +	return (F32*) &mQ; +} + +// Return a "this" as a const F32 pointer. Do not use unless you have a very good reason.  (Not sure? Ask Falcon) +const F32* const LLVector4a::getF32ptr() const +{ +	return (const F32* const) &mQ; +} + +// Read-only access a single float in this vector. Do not use in proximity to any function call that manipulates +// the data at the whole vector level or you will incur a substantial penalty. Consider using the splat functions instead +inline F32 LLVector4a::operator[](const S32 idx) const +{ +	return ((F32*)&mQ)[idx]; +}	 + +// Prefer this method for read-only access to a single element. Prefer the templated version if the elem is known at compile time. +inline LLSimdScalar LLVector4a::getScalarAt(const S32 idx) const +{ +	// Return appropriate LLQuad. It will be cast to LLSimdScalar automatically (should be effectively a nop) +	switch (idx) +	{ +		case 0: +			return mQ; +		case 1: +			return _mm_shuffle_ps(mQ, mQ, _MM_SHUFFLE(1, 1, 1, 1)); +		case 2: +			return _mm_shuffle_ps(mQ, mQ, _MM_SHUFFLE(2, 2, 2, 2)); +		case 3: +		default: +			return _mm_shuffle_ps(mQ, mQ, _MM_SHUFFLE(3, 3, 3, 3)); +	} +} + +// Prefer this method for read-only access to a single element. Prefer the templated version if the elem is known at compile time. +template <int N> LL_FORCE_INLINE LLSimdScalar LLVector4a::getScalarAt() const +{ +	return _mm_shuffle_ps(mQ, mQ, _MM_SHUFFLE(N, N, N, N)); +} + +template<> LL_FORCE_INLINE LLSimdScalar LLVector4a::getScalarAt<0>() const +{ +	return mQ; +} + +// Set to an x, y, z and optional w provided +inline void LLVector4a::set(F32 x, F32 y, F32 z, F32 w) +{ +	mQ = _mm_set_ps(w, z, y, x); +} + +// Set to all zeros +inline void LLVector4a::clear() +{ +	mQ = LLVector4a::getZero().mQ; +} + +inline void LLVector4a::splat(const F32 x) +{ +	mQ = _mm_set1_ps(x);	 +} + +inline void LLVector4a::splat(const LLSimdScalar& x) +{ +	mQ = _mm_shuffle_ps( x.getQuad(), x.getQuad(), _MM_SHUFFLE(0,0,0,0) ); +} + +// Set all 4 elements to element N of src, with N known at compile time +template <int N> void LLVector4a::splat(const LLVector4a& src) +{ +	mQ = _mm_shuffle_ps(src.mQ, src.mQ, _MM_SHUFFLE(N, N, N, N) ); +} + +// Set all 4 elements to element i of v, with i NOT known at compile time +inline void LLVector4a::splat(const LLVector4a& v, U32 i) +{ +	switch (i) +	{ +		case 0: +			mQ = _mm_shuffle_ps(v.mQ, v.mQ, _MM_SHUFFLE(0, 0, 0, 0)); +			break; +		case 1: +			mQ = _mm_shuffle_ps(v.mQ, v.mQ, _MM_SHUFFLE(1, 1, 1, 1)); +			break; +		case 2: +			mQ = _mm_shuffle_ps(v.mQ, v.mQ, _MM_SHUFFLE(2, 2, 2, 2)); +			break; +		case 3: +			mQ = _mm_shuffle_ps(v.mQ, v.mQ, _MM_SHUFFLE(3, 3, 3, 3)); +			break; +	} +} + +// Select bits from sourceIfTrue and sourceIfFalse according to bits in mask +inline void LLVector4a::setSelectWithMask( const LLVector4Logical& mask, const LLVector4a& sourceIfTrue, const LLVector4a& sourceIfFalse ) +{ +	// ((( sourceIfTrue ^ sourceIfFalse ) & mask) ^ sourceIfFalse ) +	// E.g., sourceIfFalse = 1010b, sourceIfTrue = 0101b, mask = 1100b +	// (sourceIfTrue ^ sourceIfFalse) = 1111b --> & mask = 1100b --> ^ sourceIfFalse = 0110b,  +	// as expected (01 from sourceIfTrue, 10 from sourceIfFalse) +	// Courtesy of Mark++, http://markplusplus.wordpress.com/2007/03/14/fast-sse-select-operation/ +	mQ = _mm_xor_ps( sourceIfFalse, _mm_and_ps( mask, _mm_xor_ps( sourceIfTrue, sourceIfFalse ) ) ); +} + +//////////////////////////////////// +// ALGEBRAIC +//////////////////////////////////// + +// Set this to the element-wise (a + b) +inline void LLVector4a::setAdd(const LLVector4a& a, const LLVector4a& b) +{ +	mQ = _mm_add_ps(a.mQ, b.mQ); +} + +// Set this to element-wise (a - b) +inline void LLVector4a::setSub(const LLVector4a& a, const LLVector4a& b) +{ +	mQ = _mm_sub_ps(a.mQ, b.mQ); +} + +// Set this to element-wise multiply (a * b) +inline void LLVector4a::setMul(const LLVector4a& a, const LLVector4a& b) +{ +	mQ = _mm_mul_ps(a.mQ, b.mQ); +} + +// Set this to element-wise quotient (a / b) +inline void LLVector4a::setDiv(const LLVector4a& a, const LLVector4a& b) +{ +	mQ = _mm_div_ps( a.mQ, b.mQ ); +} + +// Set this to the element-wise absolute value of src +inline void LLVector4a::setAbs(const LLVector4a& src) +{ +	static const LL_ALIGN_16(U32 F_ABS_MASK_4A[4]) = { 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF }; +	mQ = _mm_and_ps(src.mQ, *reinterpret_cast<const LLQuad*>(F_ABS_MASK_4A)); +} + +// Add to each component in this vector the corresponding component in rhs +inline void LLVector4a::add(const LLVector4a& rhs) +{ +	mQ = _mm_add_ps(mQ, rhs.mQ);	 +} + +// Subtract from each component in this vector the corresponding component in rhs +inline void LLVector4a::sub(const LLVector4a& rhs) +{ +	mQ = _mm_sub_ps(mQ, rhs.mQ); +} + +// Multiply each component in this vector by the corresponding component in rhs +inline void LLVector4a::mul(const LLVector4a& rhs) +{ +	mQ = _mm_mul_ps(mQ, rhs.mQ);	 +} + +// Divide each component in this vector by the corresponding component in rhs +inline void LLVector4a::div(const LLVector4a& rhs) +{ +	// TODO: Check accuracy, maybe add divFast +	mQ = _mm_div_ps(mQ, rhs.mQ); +} + +// Multiply this vector by x in a scalar fashion +inline void LLVector4a::mul(const F32 x)  +{ +	LLVector4a t; +	t.splat(x); +	 +	mQ = _mm_mul_ps(mQ, t.mQ); +} + +// Set this to (a x b) (geometric cross-product) +inline void LLVector4a::setCross3(const LLVector4a& a, const LLVector4a& b) +{ +	// Vectors are stored in memory in w, z, y, x order from high to low +	// Set vector1 = { a[W], a[X], a[Z], a[Y] } +	const LLQuad vector1 = _mm_shuffle_ps( a.mQ, a.mQ, _MM_SHUFFLE( 3, 0, 2, 1 )); +	// Set vector2 = { b[W], b[Y], b[X], b[Z] } +	const LLQuad vector2 = _mm_shuffle_ps( b.mQ, b.mQ, _MM_SHUFFLE( 3, 1, 0, 2 )); +	// mQ = { a[W]*b[W], a[X]*b[Y], a[Z]*b[X], a[Y]*b[Z] } +	mQ = _mm_mul_ps( vector1, vector2 ); +	// vector3 = { a[W], a[Y], a[X], a[Z] } +	const LLQuad vector3 = _mm_shuffle_ps( a.mQ, a.mQ, _MM_SHUFFLE( 3, 1, 0, 2 )); +	// vector4 = { b[W], b[X], b[Z], b[Y] } +	const LLQuad vector4 = _mm_shuffle_ps( b.mQ, b.mQ, _MM_SHUFFLE( 3, 0, 2, 1 )); +	// mQ = { 0, a[X]*b[Y] - a[Y]*b[X], a[Z]*b[X] - a[X]*b[Z], a[Y]*b[Z] - a[Z]*b[Y] } +	mQ = _mm_sub_ps( mQ, _mm_mul_ps( vector3, vector4 )); +} + +/* This function works, but may be slightly slower than the one below on older machines + inline void LLVector4a::setAllDot3(const LLVector4a& a, const LLVector4a& b) + { + // ab = { a[W]*b[W], a[Z]*b[Z], a[Y]*b[Y], a[X]*b[X] } + const LLQuad ab = _mm_mul_ps( a.mQ, b.mQ ); + // yzxw = { a[W]*b[W], a[Z]*b[Z], a[X]*b[X], a[Y]*b[Y] } + const LLQuad wzxy = _mm_shuffle_ps( ab, ab, _MM_SHUFFLE(3, 2, 0, 1 )); + // xPlusY = { 2*a[W]*b[W], 2 * a[Z] * b[Z], a[Y]*b[Y] + a[X] * b[X], a[X] * b[X] + a[Y] * b[Y] } + const LLQuad xPlusY = _mm_add_ps(ab, wzxy); + // xPlusYSplat = { a[Y]*b[Y] + a[X] * b[X], a[X] * b[X] + a[Y] * b[Y], a[Y]*b[Y] + a[X] * b[X], a[X] * b[X] + a[Y] * b[Y] }  + const LLQuad xPlusYSplat = _mm_movelh_ps(xPlusY, xPlusY); + // zSplat = { a[Z]*b[Z], a[Z]*b[Z], a[Z]*b[Z], a[Z]*b[Z] } + const LLQuad zSplat = _mm_shuffle_ps( ab, ab, _MM_SHUFFLE( 2, 2, 2, 2 )); + // mQ = { a[Z] * b[Z] + a[Y] * b[Y] + a[X] * b[X], same, same, same } + mQ = _mm_add_ps(zSplat, xPlusYSplat); + }*/ + +// Set all elements to the dot product of the x, y, and z elements in a and b +inline void LLVector4a::setAllDot3(const LLVector4a& a, const LLVector4a& b) +{ +	// ab = { a[W]*b[W], a[Z]*b[Z], a[Y]*b[Y], a[X]*b[X] } +	const LLQuad ab = _mm_mul_ps( a.mQ, b.mQ ); +	// yzxw = { a[W]*b[W], a[Z]*b[Z], a[X]*b[X], a[Y]*b[Y] } +	const __m128i wzxy = _mm_shuffle_epi32(_mm_castps_si128(ab), _MM_SHUFFLE(3, 2, 0, 1 )); +	// xPlusY = { 2*a[W]*b[W], 2 * a[Z] * b[Z], a[Y]*b[Y] + a[X] * b[X], a[X] * b[X] + a[Y] * b[Y] } +	const LLQuad xPlusY = _mm_add_ps(ab, _mm_castsi128_ps(wzxy)); +	// xPlusYSplat = { a[Y]*b[Y] + a[X] * b[X], a[X] * b[X] + a[Y] * b[Y], a[Y]*b[Y] + a[X] * b[X], a[X] * b[X] + a[Y] * b[Y] }  +	const LLQuad xPlusYSplat = _mm_movelh_ps(xPlusY, xPlusY); +	// zSplat = { a[Z]*b[Z], a[Z]*b[Z], a[Z]*b[Z], a[Z]*b[Z] } +	const __m128i zSplat = _mm_shuffle_epi32(_mm_castps_si128(ab), _MM_SHUFFLE( 2, 2, 2, 2 )); +	// mQ = { a[Z] * b[Z] + a[Y] * b[Y] + a[X] * b[X], same, same, same } +	mQ = _mm_add_ps(_mm_castsi128_ps(zSplat), xPlusYSplat); +} + +// Set all elements to the dot product of the x, y, z, and w elements in a and b +inline void LLVector4a::setAllDot4(const LLVector4a& a, const LLVector4a& b) +{ +	// ab = { a[W]*b[W], a[Z]*b[Z], a[Y]*b[Y], a[X]*b[X] } +	const LLQuad ab = _mm_mul_ps( a.mQ, b.mQ ); +	// yzxw = { a[W]*b[W], a[Z]*b[Z], a[X]*b[X], a[Y]*b[Y] } +	const __m128i zwxy = _mm_shuffle_epi32(_mm_castps_si128(ab), _MM_SHUFFLE(2, 3, 0, 1 )); +	// zPlusWandXplusY = { a[W]*b[W] + a[Z]*b[Z], a[Z] * b[Z] + a[W]*b[W], a[Y]*b[Y] + a[X] * b[X], a[X] * b[X] + a[Y] * b[Y] } +	const LLQuad zPlusWandXplusY = _mm_add_ps(ab, _mm_castsi128_ps(zwxy)); +	// xPlusYSplat = { a[Y]*b[Y] + a[X] * b[X], a[X] * b[X] + a[Y] * b[Y], a[Y]*b[Y] + a[X] * b[X], a[X] * b[X] + a[Y] * b[Y] }  +	const LLQuad xPlusYSplat = _mm_movelh_ps(zPlusWandXplusY, zPlusWandXplusY); +	const LLQuad zPlusWSplat = _mm_movehl_ps(zPlusWandXplusY, zPlusWandXplusY); + +	// mQ = { a[W]*b[W] + a[Z] * b[Z] + a[Y] * b[Y] + a[X] * b[X], same, same, same } +	mQ = _mm_add_ps(xPlusYSplat, zPlusWSplat); +} + +// Return the 3D dot product of this vector and b +inline LLSimdScalar LLVector4a::dot3(const LLVector4a& b) const +{ +	const LLQuad ab = _mm_mul_ps( mQ, b.mQ ); +	const LLQuad splatY = _mm_castsi128_ps( _mm_shuffle_epi32( _mm_castps_si128(ab), _MM_SHUFFLE(1, 1, 1, 1) ) ); +	const LLQuad splatZ = _mm_castsi128_ps( _mm_shuffle_epi32( _mm_castps_si128(ab), _MM_SHUFFLE(2, 2, 2, 2) ) ); +	const LLQuad xPlusY = _mm_add_ps( ab, splatY ); +	return _mm_add_ps( xPlusY, splatZ );	 +} + +// Return the 4D dot product of this vector and b +inline LLSimdScalar LLVector4a::dot4(const LLVector4a& b) const +{ +	// ab = { w, z, y, x } + 	const LLQuad ab = _mm_mul_ps( mQ, b.mQ ); + 	// upperProdsInLowerElems = { y, x, y, x } +	const LLQuad upperProdsInLowerElems = _mm_movehl_ps( ab, ab ); +	// sumOfPairs = { w+y, z+x, 2y, 2x } + 	const LLQuad sumOfPairs = _mm_add_ps( upperProdsInLowerElems, ab ); +	// shuffled = { z+x, z+x, z+x, z+x } +	const LLQuad shuffled = _mm_castsi128_ps( _mm_shuffle_epi32( _mm_castps_si128( sumOfPairs ), _MM_SHUFFLE(1, 1, 1, 1) ) ); +	return _mm_add_ss( sumOfPairs, shuffled ); +} + +// Normalize this vector with respect to the x, y, and z components only. Accurate to 22 bites of precision. W component is destroyed +// Note that this does not consider zero length vectors! +inline void LLVector4a::normalize3() +{ +	// lenSqrd = a dot a +	LLVector4a lenSqrd; lenSqrd.setAllDot3( *this, *this ); +	// rsqrt = approximate reciprocal square (i.e., { ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2 } +	const LLQuad rsqrt = _mm_rsqrt_ps(lenSqrd.mQ); +	static const LLQuad half = { 0.5f, 0.5f, 0.5f, 0.5f }; +	static const LLQuad three = {3.f, 3.f, 3.f, 3.f }; +	// Now we do one round of Newton-Raphson approximation to get full accuracy +	// According to the Newton-Raphson method, given a first 'w' for the root of f(x) = 1/x^2 - a (i.e., x = 1/sqrt(a)) +	// the next better approximation w[i+1] = w - f(w)/f'(w) = w - (1/w^2 - a)/(-2*w^(-3)) +	// w[i+1] = w + 0.5 * (1/w^2 - a) * w^3 = w + 0.5 * (w - a*w^3) = 1.5 * w - 0.5 * a * w^3 +	// = 0.5 * w * (3 - a*w^2) +	// Our first approx is w = rsqrt. We need out = a * w[i+1] (this is the input vector 'a', not the 'a' from the above formula +	// which is actually lenSqrd). So out = a * [0.5*rsqrt * (3 - lenSqrd*rsqrt*rsqrt)] +	const LLQuad AtimesRsqrt = _mm_mul_ps( lenSqrd.mQ, rsqrt ); +	const LLQuad AtimesRsqrtTimesRsqrt = _mm_mul_ps( AtimesRsqrt, rsqrt ); +	const LLQuad threeMinusAtimesRsqrtTimesRsqrt = _mm_sub_ps(three, AtimesRsqrtTimesRsqrt ); +	const LLQuad nrApprox = _mm_mul_ps(half, _mm_mul_ps(rsqrt, threeMinusAtimesRsqrtTimesRsqrt)); +	mQ = _mm_mul_ps( mQ, nrApprox ); +} + +// Normalize this vector with respect to all components. Accurate to 22 bites of precision. +// Note that this does not consider zero length vectors! +inline void LLVector4a::normalize4() +{ +	// lenSqrd = a dot a +	LLVector4a lenSqrd; lenSqrd.setAllDot4( *this, *this ); +	// rsqrt = approximate reciprocal square (i.e., { ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2 } +	const LLQuad rsqrt = _mm_rsqrt_ps(lenSqrd.mQ); +	static const LLQuad half = { 0.5f, 0.5f, 0.5f, 0.5f }; +	static const LLQuad three = {3.f, 3.f, 3.f, 3.f }; +	// Now we do one round of Newton-Raphson approximation to get full accuracy +	// According to the Newton-Raphson method, given a first 'w' for the root of f(x) = 1/x^2 - a (i.e., x = 1/sqrt(a)) +	// the next better approximation w[i+1] = w - f(w)/f'(w) = w - (1/w^2 - a)/(-2*w^(-3)) +	// w[i+1] = w + 0.5 * (1/w^2 - a) * w^3 = w + 0.5 * (w - a*w^3) = 1.5 * w - 0.5 * a * w^3 +	// = 0.5 * w * (3 - a*w^2) +	// Our first approx is w = rsqrt. We need out = a * w[i+1] (this is the input vector 'a', not the 'a' from the above formula +	// which is actually lenSqrd). So out = a * [0.5*rsqrt * (3 - lenSqrd*rsqrt*rsqrt)] +	const LLQuad AtimesRsqrt = _mm_mul_ps( lenSqrd.mQ, rsqrt ); +	const LLQuad AtimesRsqrtTimesRsqrt = _mm_mul_ps( AtimesRsqrt, rsqrt ); +	const LLQuad threeMinusAtimesRsqrtTimesRsqrt = _mm_sub_ps(three, AtimesRsqrtTimesRsqrt ); +	const LLQuad nrApprox = _mm_mul_ps(half, _mm_mul_ps(rsqrt, threeMinusAtimesRsqrtTimesRsqrt)); +	mQ = _mm_mul_ps( mQ, nrApprox ); +} + +// Normalize this vector with respect to the x, y, and z components only. Accurate to 22 bites of precision. W component is destroyed +// Note that this does not consider zero length vectors! +inline LLSimdScalar LLVector4a::normalize3withLength() +{ +	// lenSqrd = a dot a +	LLVector4a lenSqrd; lenSqrd.setAllDot3( *this, *this ); +	// rsqrt = approximate reciprocal square (i.e., { ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2 } +	const LLQuad rsqrt = _mm_rsqrt_ps(lenSqrd.mQ); +	static const LLQuad half = { 0.5f, 0.5f, 0.5f, 0.5f }; +	static const LLQuad three = {3.f, 3.f, 3.f, 3.f }; +	// Now we do one round of Newton-Raphson approximation to get full accuracy +	// According to the Newton-Raphson method, given a first 'w' for the root of f(x) = 1/x^2 - a (i.e., x = 1/sqrt(a)) +	// the next better approximation w[i+1] = w - f(w)/f'(w) = w - (1/w^2 - a)/(-2*w^(-3)) +	// w[i+1] = w + 0.5 * (1/w^2 - a) * w^3 = w + 0.5 * (w - a*w^3) = 1.5 * w - 0.5 * a * w^3 +	// = 0.5 * w * (3 - a*w^2) +	// Our first approx is w = rsqrt. We need out = a * w[i+1] (this is the input vector 'a', not the 'a' from the above formula +	// which is actually lenSqrd). So out = a * [0.5*rsqrt * (3 - lenSqrd*rsqrt*rsqrt)] +	const LLQuad AtimesRsqrt = _mm_mul_ps( lenSqrd.mQ, rsqrt ); +	const LLQuad AtimesRsqrtTimesRsqrt = _mm_mul_ps( AtimesRsqrt, rsqrt ); +	const LLQuad threeMinusAtimesRsqrtTimesRsqrt = _mm_sub_ps(three, AtimesRsqrtTimesRsqrt ); +	const LLQuad nrApprox = _mm_mul_ps(half, _mm_mul_ps(rsqrt, threeMinusAtimesRsqrtTimesRsqrt)); +	mQ = _mm_mul_ps( mQ, nrApprox ); +	return _mm_sqrt_ss(lenSqrd); +} + +// Normalize this vector with respect to the x, y, and z components only. Accurate only to 10-12 bits of precision. W component is destroyed +// Note that this does not consider zero length vectors! +inline void LLVector4a::normalize3fast() +{ +	LLVector4a lenSqrd; lenSqrd.setAllDot3( *this, *this ); +	const LLQuad approxRsqrt = _mm_rsqrt_ps(lenSqrd.mQ); +	mQ = _mm_mul_ps( mQ, approxRsqrt ); +} + +// Return true if this vector is normalized with respect to x,y,z up to tolerance +inline LLBool32 LLVector4a::isNormalized3( F32 tolerance ) const +{ +	static LL_ALIGN_16(const U32 ones[4]) = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 }; +	LLSimdScalar tol = _mm_load_ss( &tolerance ); +	tol = _mm_mul_ss( tol, tol ); +	LLVector4a lenSquared; lenSquared.setAllDot3( *this, *this ); +	lenSquared.sub( *reinterpret_cast<const LLVector4a*>(ones) ); +	lenSquared.setAbs(lenSquared); +	return _mm_comile_ss( lenSquared, tol );		 +} + +// Return true if this vector is normalized with respect to all components up to tolerance +inline LLBool32 LLVector4a::isNormalized4( F32 tolerance ) const +{ +	static LL_ALIGN_16(const U32 ones[4]) = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 }; +	LLSimdScalar tol = _mm_load_ss( &tolerance ); +	tol = _mm_mul_ss( tol, tol ); +	LLVector4a lenSquared; lenSquared.setAllDot4( *this, *this ); +	lenSquared.sub( *reinterpret_cast<const LLVector4a*>(ones) ); +	lenSquared.setAbs(lenSquared); +	return _mm_comile_ss( lenSquared, tol );		 +} + +// Set all elements to the length of vector 'v'  +inline void LLVector4a::setAllLength3( const LLVector4a& v ) +{ +	LLVector4a lenSqrd; +	lenSqrd.setAllDot3(v, v); +	 +	mQ = _mm_sqrt_ps(lenSqrd.mQ); +} + +// Get this vector's length +inline LLSimdScalar LLVector4a::getLength3() const +{ +	return _mm_sqrt_ss( dot3( (const LLVector4a)mQ ) ); +} + +// Set the components of this vector to the minimum of the corresponding components of lhs and rhs +inline void LLVector4a::setMin(const LLVector4a& lhs, const LLVector4a& rhs) +{ +	mQ = _mm_min_ps(lhs.mQ, rhs.mQ); +} + +// Set the components of this vector to the maximum of the corresponding components of lhs and rhs +inline void LLVector4a::setMax(const LLVector4a& lhs, const LLVector4a& rhs) +{ +	mQ = _mm_max_ps(lhs.mQ, rhs.mQ); +} + +// Set this to  (c * lhs) + rhs * ( 1 - c) +inline void LLVector4a::setLerp(const LLVector4a& lhs, const LLVector4a& rhs, F32 c) +{ +	LLVector4a a = lhs; +	a.mul(c); +	 +	LLVector4a b = rhs; +	b.mul(1.f-c); +	 +	setAdd(a, b); +} + +inline LLBool32 LLVector4a::isFinite3() const +{ +	static LL_ALIGN_16(const U32 nanOrInfMask[4]) = { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 }; +	const __m128i nanOrInfMaskV = *reinterpret_cast<const __m128i*> (nanOrInfMask); +	const __m128i maskResult = _mm_and_si128( _mm_castps_si128(mQ), nanOrInfMaskV ); +	const LLVector4Logical equalityCheck = _mm_castsi128_ps(_mm_cmpeq_epi32( maskResult, nanOrInfMaskV )); +	return !equalityCheck.areAnySet( LLVector4Logical::MASK_XYZ ); +} +	 +inline LLBool32 LLVector4a::isFinite4() const +{ +	static LL_ALIGN_16(const U32 nanOrInfMask[4]) = { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 }; +	const __m128i nanOrInfMaskV = *reinterpret_cast<const __m128i*> (nanOrInfMask); +	const __m128i maskResult = _mm_and_si128( _mm_castps_si128(mQ), nanOrInfMaskV ); +	const LLVector4Logical equalityCheck = _mm_castsi128_ps(_mm_cmpeq_epi32( maskResult, nanOrInfMaskV )); +	return !equalityCheck.areAnySet( LLVector4Logical::MASK_XYZW ); +} + +inline void LLVector4a::setRotatedInv( const LLRotation& rot, const LLVector4a& vec ) +{ +	LLRotation inv; inv.setTranspose( rot ); +	setRotated( inv, vec ); +} + +inline void LLVector4a::setRotatedInv( const LLQuaternion2& quat, const LLVector4a& vec ) +{ +	LLQuaternion2 invRot; invRot.setConjugate( quat ); +	setRotated(invRot, vec); +} + +inline void LLVector4a::clamp( const LLVector4a& low, const LLVector4a& high ) +{ +	const LLVector4Logical highMask = greaterThan( high ); +	const LLVector4Logical lowMask = lessThan( low ); + +	setSelectWithMask( highMask, high, *this ); +	setSelectWithMask( lowMask, low, *this ); +} + + +//////////////////////////////////// +// LOGICAL +////////////////////////////////////	 +// The functions in this section will compare the elements in this vector +// to those in rhs and return an LLVector4Logical with all bits set in elements +// where the comparison was true and all bits unset in elements where the comparison +// was false. See llvector4logica.h +//////////////////////////////////// +// WARNING: Other than equals3 and equals4, these functions do NOT account +// for floating point tolerance. You should include the appropriate tolerance +// in the inputs. +//////////////////////////////////// + +inline LLVector4Logical LLVector4a::greaterThan(const LLVector4a& rhs) const +{	 +	return _mm_cmpgt_ps(mQ, rhs.mQ); +} + +inline LLVector4Logical LLVector4a::lessThan(const LLVector4a& rhs) const +{ +	return _mm_cmplt_ps(mQ, rhs.mQ); +} + +inline LLVector4Logical LLVector4a::greaterEqual(const LLVector4a& rhs) const +{ +	return _mm_cmpge_ps(mQ, rhs.mQ); +} + +inline LLVector4Logical LLVector4a::lessEqual(const LLVector4a& rhs) const +{ +	return _mm_cmple_ps(mQ, rhs.mQ); +} + +inline LLVector4Logical LLVector4a::equal(const LLVector4a& rhs) const +{ +	return _mm_cmpeq_ps(mQ, rhs.mQ); +} + +// Returns true if this and rhs are componentwise equal up to the specified absolute tolerance +inline bool LLVector4a::equals4(const LLVector4a& rhs, F32 tolerance ) const +{ +	LLVector4a diff; diff.setSub( *this, rhs ); +	diff.setAbs( diff ); +	const LLQuad tol = _mm_set1_ps( tolerance ); +	const LLQuad cmp = _mm_cmplt_ps( diff, tol ); +	return (_mm_movemask_ps( cmp ) & LLVector4Logical::MASK_XYZW) == LLVector4Logical::MASK_XYZW; +} + +inline bool LLVector4a::equals3(const LLVector4a& rhs, F32 tolerance ) const +{ +	LLVector4a diff; diff.setSub( *this, rhs ); +	diff.setAbs( diff ); +	const LLQuad tol = _mm_set1_ps( tolerance ); +	const LLQuad t = _mm_cmplt_ps( diff, tol );  +	return (_mm_movemask_ps( t ) & LLVector4Logical::MASK_XYZ) == LLVector4Logical::MASK_XYZ; +	 +} + +//////////////////////////////////// +// OPERATORS +////////////////////////////////////	 + +// Do NOT add aditional operators without consulting someone with SSE experience +inline const LLVector4a& LLVector4a::operator= ( const LLVector4a& rhs ) +{ +	mQ = rhs.mQ; +	return *this; +} + +inline const LLVector4a& LLVector4a::operator= ( const LLQuad& rhs ) +{ +	mQ = rhs; +	return *this; +} + +inline LLVector4a::operator LLQuad() const +{ +	return mQ; +} diff --git a/indra/llmath/llvector4logical.h b/indra/llmath/llvector4logical.h new file mode 100644 index 0000000000..dd66b09d43 --- /dev/null +++ b/indra/llmath/llvector4logical.h @@ -0,0 +1,124 @@ +/**  + * @file llvector4logical.h + * @brief LLVector4Logical class header file - Companion class to LLVector4a for logical and bit-twiddling operations + * + * $LicenseInfo:firstyear=2010&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2010, Linden Research, Inc. + *  + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + *  + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * Lesser General Public License for more details. + *  + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA + *  + * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA + * $/LicenseInfo$ + */ + +#ifndef	LL_VECTOR4LOGICAL_H +#define	LL_VECTOR4LOGICAL_H + + +//////////////////////////// +// LLVector4Logical +//////////////////////////// +// This class is incomplete. If you need additional functionality, +// for example setting/unsetting particular elements or performing +// other boolean operations, feel free to implement. If you need +// assistance in determining the most optimal implementation, +// contact someone with SSE experience (Falcon, Richard, Davep, e.g.) +//////////////////////////// + +static LL_ALIGN_16(const U32 S_V4LOGICAL_MASK_TABLE[4*4]) = +{ +	0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000, +	0x00000000, 0xFFFFFFFF, 0x00000000, 0x00000000, +	0x00000000, 0x00000000, 0xFFFFFFFF, 0x00000000, +	0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF +}; + +class LLVector4Logical +{ +public: +	 +	enum { +		MASK_X = 1, +		MASK_Y = 1 << 1, +		MASK_Z = 1 << 2, +		MASK_W = 1 << 3, +		MASK_XYZ = MASK_X | MASK_Y | MASK_Z, +		MASK_XYZW = MASK_XYZ | MASK_W +	}; +	 +	// Empty default ctor +	LLVector4Logical() {} +	 +	LLVector4Logical( const LLQuad& quad ) +	{ +		mQ = quad; +	} +	 +	// Create and return a mask consisting of the lowest order bit of each element +	inline U32 getGatheredBits() const +	{ +		return _mm_movemask_ps(mQ); +	};	 +	 +	// Invert this mask +	inline LLVector4Logical& invert() +	{ +		static const LL_ALIGN_16(U32 allOnes[4]) = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF }; +		mQ = _mm_andnot_ps( mQ, *(LLQuad*)(allOnes) ); +		return *this; +	} +	 +	inline LLBool32 areAllSet( U32 mask ) const +	{ +		return ( getGatheredBits() & mask) == mask; +	} +	 +	inline LLBool32 areAllSet() const +	{ +		return areAllSet( MASK_XYZW ); +	} +		 +	inline LLBool32 areAnySet( U32 mask ) const +	{ +		return getGatheredBits() & mask; +	} +	 +	inline LLBool32 areAnySet() const +	{ +		return areAnySet( MASK_XYZW ); +	} +	 +	inline operator LLQuad() const +	{ +		return mQ; +	} + +	inline void clear()  +	{ +		mQ = _mm_setzero_ps(); +	} + +	template<int N> void setElement() +	{ +		mQ = _mm_or_ps( mQ, *reinterpret_cast<const LLQuad*>(S_V4LOGICAL_MASK_TABLE + 4*N) ); +	} +	 +private: +	 +	LLQuad mQ; +}; + +#endif //LL_VECTOR4ALOGICAL_H diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 14e1ca8d43..cc9744756f 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -1,4 +1,5 @@  /**  +   * @file llvolume.cpp   *   * $LicenseInfo:firstyear=2002&license=viewerlgpl$ @@ -24,9 +25,14 @@   */  #include "linden_common.h" +#include "llmemory.h"  #include "llmath.h"  #include <set> +#if !LL_WINDOWS +#include <stdint.h> +#endif +#include <cmath>  #include "llerror.h"  #include "llmemtype.h" @@ -37,9 +43,16 @@  #include "v4math.h"  #include "m4math.h"  #include "m3math.h" +#include "llmatrix3a.h" +#include "lloctree.h"  #include "lldarray.h"  #include "llvolume.h" +#include "llvolumeoctree.h"  #include "llstl.h" +#include "llsdserialize.h" +#include "llvector4a.h" +#include "llmatrix4a.h" +#include "lltimer.h"  #define DEBUG_SILHOUETTE_BINORMALS 0  #define DEBUG_SILHOUETTE_NORMALS 0 // TomY: Use this to display normals using the silhouette @@ -80,7 +93,18 @@ const F32 SKEW_MAX	=  0.95f;  const F32 SCULPT_MIN_AREA = 0.002f;  const S32 SCULPT_MIN_AREA_DETAIL = 1; -#define GEN_TRI_STRIP 0 +extern BOOL gDebugGL; + +void assert_aligned(void* ptr, uintptr_t alignment) +{ +#if 0 +	uintptr_t t = (uintptr_t) ptr; +	if (t%alignment != 0) +	{ +		llerrs << "Alignment check failed." << llendl; +	} +#endif +}  BOOL check_same_clock_dir( const LLVector3& pt1, const LLVector3& pt2, const LLVector3& pt3, const LLVector3& norm)  {     @@ -99,128 +123,262 @@ BOOL check_same_clock_dir( const LLVector3& pt1, const LLVector3& pt2, const LLV  BOOL LLLineSegmentBoxIntersect(const LLVector3& start, const LLVector3& end, const LLVector3& center, const LLVector3& size)  { -	float fAWdU[3]; -	LLVector3 dir; -	LLVector3 diff; +	return LLLineSegmentBoxIntersect(start.mV, end.mV, center.mV, size.mV); +} + +BOOL LLLineSegmentBoxIntersect(const F32* start, const F32* end, const F32* center, const F32* size) +{ +	F32 fAWdU[3]; +	F32 dir[3]; +	F32 diff[3];  	for (U32 i = 0; i < 3; i++)  	{ -		dir.mV[i] = 0.5f * (end.mV[i] - start.mV[i]); -		diff.mV[i] = (0.5f * (end.mV[i] + start.mV[i])) - center.mV[i]; -		fAWdU[i] = fabsf(dir.mV[i]); -		if(fabsf(diff.mV[i])>size.mV[i] + fAWdU[i]) return false; +		dir[i] = 0.5f * (end[i] - start[i]); +		diff[i] = (0.5f * (end[i] + start[i])) - center[i]; +		fAWdU[i] = fabsf(dir[i]); +		if(fabsf(diff[i])>size[i] + fAWdU[i]) return false;  	}  	float f; -	f = dir.mV[1] * diff.mV[2] - dir.mV[2] * diff.mV[1];    if(fabsf(f)>size.mV[1]*fAWdU[2] + size.mV[2]*fAWdU[1])  return false; -	f = dir.mV[2] * diff.mV[0] - dir.mV[0] * diff.mV[2];    if(fabsf(f)>size.mV[0]*fAWdU[2] + size.mV[2]*fAWdU[0])  return false; -	f = dir.mV[0] * diff.mV[1] - dir.mV[1] * diff.mV[0];    if(fabsf(f)>size.mV[0]*fAWdU[1] + size.mV[1]*fAWdU[0])  return false; +	f = dir[1] * diff[2] - dir[2] * diff[1];    if(fabsf(f)>size[1]*fAWdU[2] + size[2]*fAWdU[1])  return false; +	f = dir[2] * diff[0] - dir[0] * diff[2];    if(fabsf(f)>size[0]*fAWdU[2] + size[2]*fAWdU[0])  return false; +	f = dir[0] * diff[1] - dir[1] * diff[0];    if(fabsf(f)>size[0]*fAWdU[1] + size[1]*fAWdU[0])  return false;  	return true;  } +  // intersect test between triangle vert0, vert1, vert2 and a ray from orig in direction dir.  // returns TRUE if intersecting and returns barycentric coordinates in intersection_a, intersection_b,  // and returns the intersection point along dir in intersection_t.  // Moller-Trumbore algorithm -BOOL LLTriangleRayIntersect(const LLVector3& vert0, const LLVector3& vert1, const LLVector3& vert2, const LLVector3& orig, const LLVector3& dir, -							F32* intersection_a, F32* intersection_b, F32* intersection_t, BOOL two_sided) +BOOL LLTriangleRayIntersect(const LLVector4a& vert0, const LLVector4a& vert1, const LLVector4a& vert2, const LLVector4a& orig, const LLVector4a& dir, +							F32& intersection_a, F32& intersection_b, F32& intersection_t)  { -	F32 u, v, t;  	/* find vectors for two edges sharing vert0 */ -	LLVector3 edge1 = vert1 - vert0; +	LLVector4a edge1; +	edge1.setSub(vert1, vert0); -	LLVector3 edge2 = vert2 - vert0;; +	LLVector4a edge2; +	edge2.setSub(vert2, vert0);  	/* begin calculating determinant - also used to calculate U parameter */ -	LLVector3 pvec = dir % edge2; -	 -	/* if determinant is near zero, ray lies in plane of triangle */ -	F32 det = edge1 * pvec; +	LLVector4a pvec; +	pvec.setCross3(dir, edge2); -	if (!two_sided) +	/* if determinant is near zero, ray lies in plane of triangle */ +	LLVector4a det; +	det.setAllDot3(edge1, pvec); +	 +	if (det.greaterEqual(LLVector4a::getEpsilon()).getGatheredBits() & 0x7)  	{ -		if (det < F_APPROXIMATELY_ZERO) -		{ -			return FALSE; -		} -  		/* calculate distance from vert0 to ray origin */ -		LLVector3 tvec = orig - vert0; +		LLVector4a tvec; +		tvec.setSub(orig, vert0);  		/* calculate U parameter and test bounds */ -		u = tvec * pvec;	 +		LLVector4a u; +		u.setAllDot3(tvec,pvec); -		if (u < 0.f || u > det) +		if ((u.greaterEqual(LLVector4a::getZero()).getGatheredBits() & 0x7) && +			(u.lessEqual(det).getGatheredBits() & 0x7))  		{ -			return FALSE; +			/* prepare to test V parameter */ +			LLVector4a qvec; +			qvec.setCross3(tvec, edge1); +			 +			/* calculate V parameter and test bounds */ +			LLVector4a v; +			v.setAllDot3(dir, qvec); + +			 +			//if (!(v < 0.f || u + v > det)) + +			LLVector4a sum_uv; +			sum_uv.setAdd(u, v); + +			S32 v_gequal = v.greaterEqual(LLVector4a::getZero()).getGatheredBits() & 0x7; +			S32 sum_lequal = sum_uv.lessEqual(det).getGatheredBits() & 0x7; + +			if (v_gequal  && sum_lequal) +			{ +				/* calculate t, scale parameters, ray intersects triangle */ +				LLVector4a t; +				t.setAllDot3(edge2,qvec); + +				t.div(det); +				u.div(det); +				v.div(det); +				 +				intersection_a = u[0]; +				intersection_b = v[0]; +				intersection_t = t[0]; +				return TRUE; +			}  		} -	 -		/* prepare to test V parameter */ -		LLVector3 qvec = tvec % edge1; +	} -		/* calculate V parameter and test bounds */ -		v = dir * qvec; -		if (v < 0.f || u + v > det) -		{ -			return FALSE; -		} +	return FALSE; +}  -		/* calculate t, scale parameters, ray intersects triangle */ -		t = edge2 * qvec; -		F32 inv_det = 1.0 / det; -		t *= inv_det; -		u *= inv_det; -		v *= inv_det; -	} +BOOL LLTriangleRayIntersectTwoSided(const LLVector4a& vert0, const LLVector4a& vert1, const LLVector4a& vert2, const LLVector4a& orig, const LLVector4a& dir, +							F32& intersection_a, F32& intersection_b, F32& intersection_t) +{ +	F32 u, v, t; -	else // two sided -			{ -		if (det > -F_APPROXIMATELY_ZERO && det < F_APPROXIMATELY_ZERO) -				{ -			return FALSE; -				} -		F32 inv_det = 1.0 / det; +	/* find vectors for two edges sharing vert0 */ +	LLVector4a edge1; +	edge1.setSub(vert1, vert0); +	 +	 +	LLVector4a edge2; +	edge2.setSub(vert2, vert0); -		/* calculate distance from vert0 to ray origin */ -		LLVector3 tvec = orig - vert0; -		 -		/* calculate U parameter and test bounds */ -		u = (tvec * pvec) * inv_det; -		if (u < 0.f || u > 1.f) -		{ -			return FALSE; -			} +	/* begin calculating determinant - also used to calculate U parameter */ +	LLVector4a pvec; +	pvec.setCross3(dir, edge2); -		/* prepare to test V parameter */ -		LLVector3 qvec = tvec - edge1; -		 -		/* calculate V parameter and test bounds */ -		v = (dir * qvec) * inv_det; -		 -		if (v < 0.f || u + v > 1.f) -		{ -			return FALSE; -		} +	/* if determinant is near zero, ray lies in plane of triangle */ +	F32 det = edge1.dot3(pvec).getF32(); + +	 +	if (det > -F_APPROXIMATELY_ZERO && det < F_APPROXIMATELY_ZERO) +	{ +		return FALSE; +	} + +	F32 inv_det = 1.f / det; + +	/* calculate distance from vert0 to ray origin */ +	LLVector4a tvec; +	tvec.setSub(orig, vert0); +	 +	/* calculate U parameter and test bounds */ +	u = (tvec.dot3(pvec).getF32()) * inv_det; +	if (u < 0.f || u > 1.f) +	{ +		return FALSE; +	} -		/* calculate t, ray intersects triangle */ -		t = (edge2 * qvec) * inv_det; +	/* prepare to test V parameter */ +	tvec.sub(edge1); +		 +	/* calculate V parameter and test bounds */ +	v = (dir.dot3(tvec).getF32()) * inv_det; +	 +	if (v < 0.f || u + v > 1.f) +	{ +		return FALSE;  	} + +	/* calculate t, ray intersects triangle */ +	t = (edge2.dot3(tvec).getF32()) * inv_det; -	if (intersection_a != NULL) -		*intersection_a = u; -	if (intersection_b != NULL) -		*intersection_b = v; -	if (intersection_t != NULL) -		*intersection_t = t; +	intersection_a = u; +	intersection_b = v; +	intersection_t = t;  	return TRUE;  }  +//helper for non-aligned vectors +BOOL LLTriangleRayIntersect(const LLVector3& vert0, const LLVector3& vert1, const LLVector3& vert2, const LLVector3& orig, const LLVector3& dir, +							F32& intersection_a, F32& intersection_b, F32& intersection_t, BOOL two_sided) +{ +	LLVector4a vert0a, vert1a, vert2a, origa, dira; +	vert0a.load3(vert0.mV); +	vert1a.load3(vert1.mV); +	vert2a.load3(vert2.mV); +	origa.load3(orig.mV); +	dira.load3(dir.mV); + +	if (two_sided) +	{ +		return LLTriangleRayIntersectTwoSided(vert0a, vert1a, vert2a, origa, dira,  +				intersection_a, intersection_b, intersection_t); +	} +	else +	{ +		return LLTriangleRayIntersect(vert0a, vert1a, vert2a, origa, dira,  +				intersection_a, intersection_b, intersection_t); +	} +} + +class LLVolumeOctreeRebound : public LLOctreeTravelerDepthFirst<LLVolumeTriangle> +{ +public: +	const LLVolumeFace* mFace; + +	LLVolumeOctreeRebound(const LLVolumeFace* face) +	{ +		mFace = face; +	} + +	virtual void visit(const LLOctreeNode<LLVolumeTriangle>* branch) +	{ //this is a depth first traversal, so it's safe to assum all children have complete +		//bounding data + +		LLVolumeOctreeListener* node = (LLVolumeOctreeListener*) branch->getListener(0); + +		LLVector4a& min = node->mExtents[0]; +		LLVector4a& max = node->mExtents[1]; + +		if (!branch->getData().empty()) +		{ //node has data, find AABB that binds data set +			const LLVolumeTriangle* tri = *(branch->getData().begin()); +			 +			//initialize min/max to first available vertex +			min = *(tri->mV[0]); +			max = *(tri->mV[0]); +			 +			for (LLOctreeNode<LLVolumeTriangle>::const_element_iter iter =  +				branch->getData().begin(); iter != branch->getData().end(); ++iter) +			{ //for each triangle in node + +				//stretch by triangles in node +				tri = *iter; +				 +				min.setMin(min, *tri->mV[0]); +				min.setMin(min, *tri->mV[1]); +				min.setMin(min, *tri->mV[2]); + +				max.setMax(max, *tri->mV[0]); +				max.setMax(max, *tri->mV[1]); +				max.setMax(max, *tri->mV[2]); +			} +		} +		else if (!branch->getChildren().empty()) +		{ //no data, but child nodes exist +			LLVolumeOctreeListener* child = (LLVolumeOctreeListener*) branch->getChild(0)->getListener(0); + +			//initialize min/max to extents of first child +			min = child->mExtents[0]; +			max = child->mExtents[1]; +		} +		else +		{ +			llerrs << "Empty leaf" << llendl; +		} + +		for (S32 i = 0; i < branch->getChildCount(); ++i) +		{  //stretch by child extents +			LLVolumeOctreeListener* child = (LLVolumeOctreeListener*) branch->getChild(i)->getListener(0); +			min.setMin(min, child->mExtents[0]); +			max.setMax(max, child->mExtents[1]); +		} + +		node->mBounds[0].setAdd(min, max); +		node->mBounds[0].mul(0.5f); + +		node->mBounds[1].setSub(max,min); +		node->mBounds[1].mul(0.5f); +	} +};  //-------------------------------------------------------------------  // statics @@ -259,6 +417,70 @@ LLProfile::Face* LLProfile::addFace(S32 i, S32 count, F32 scaleU, S16 faceID, BO  	return face;  } +//static +S32 LLProfile::getNumNGonPoints(const LLProfileParams& params, S32 sides, F32 offset, F32 bevel, F32 ang_scale, S32 split) +{ // this is basically LLProfile::genNGon stripped down to only the operations that influence the number of points +	LLMemType m1(LLMemType::MTYPE_VOLUME); +	S32 np = 0; + +	// Generate an n-sided "circular" path. +	// 0 is (1,0), and we go counter-clockwise along a circular path from there. +	F32 t, t_step, t_first, t_fraction; +	 +	F32 begin  = params.getBegin(); +	F32 end    = params.getEnd(); + +	t_step = 1.0f / sides; +	 +	t_first = floor(begin * sides) / (F32)sides; + +	// pt1 is the first point on the fractional face. +	// Starting t and ang values for the first face +	t = t_first; +	 +	// Increment to the next point. +	// pt2 is the end point on the fractional face +	t += t_step; +	 +	t_fraction = (begin - t_first)*sides; + +	// Only use if it's not almost exactly on an edge. +	if (t_fraction < 0.9999f) +	{ +		np++; +	} + +	// There's lots of potential here for floating point error to generate unneeded extra points - DJS 04/05/02 +	while (t < end) +	{ +		// Iterate through all the integer steps of t. +		np++; + +		t += t_step; +	} + +	t_fraction = (end - (t - t_step))*sides; + +	// Find the fraction that we need to add to the end point. +	t_fraction = (end - (t - t_step))*sides; +	if (t_fraction > 0.0001f) +	{ +		np++; +	} + +	// If we're sliced, the profile is open. +	if ((end - begin)*ang_scale < 0.99f) +	{ +		if (params.getHollow() <= 0) +		{ +			// put center point if not hollow. +			np++; +		} +	} +	 +	return np; +} +  // What is the bevel parameter used for? - DJS 04/05/02  // Bevel parameter is currently unused but presumedly would support  // filleted and chamfered corners @@ -515,6 +737,117 @@ LLProfile::Face* LLProfile::addHole(const LLProfileParams& params, BOOL flat, F3  	return face;  } +//static +S32 LLProfile::getNumPoints(const LLProfileParams& params, BOOL path_open,F32 detail, S32 split, +						 BOOL is_sculpted, S32 sculpt_size) +{ // this is basically LLProfile::generate stripped down to only operations that influence the number of points +	LLMemType m1(LLMemType::MTYPE_VOLUME); +	 +	if (detail < MIN_LOD) +	{ +		detail = MIN_LOD; +	} + +	// Generate the face data +	F32 hollow = params.getHollow(); + +	S32 np = 0; + +	switch (params.getCurveType() & LL_PCODE_PROFILE_MASK) +	{ +	case LL_PCODE_PROFILE_SQUARE: +		{ +			np = getNumNGonPoints(params, 4,-0.375, 0, 1, split); +		 +			if (hollow) +			{ +				np *= 2; +			} +		} +		break; +	case  LL_PCODE_PROFILE_ISOTRI: +	case  LL_PCODE_PROFILE_RIGHTTRI: +	case  LL_PCODE_PROFILE_EQUALTRI: +		{ +			np = getNumNGonPoints(params, 3,0, 0, 1, split); +						 +			if (hollow) +			{ +				np *= 2; +			} +		} +		break; +	case LL_PCODE_PROFILE_CIRCLE: +		{ +			// If this has a square hollow, we should adjust the +			// number of faces a bit so that the geometry lines up. +			U8 hole_type=0; +			F32 circle_detail = MIN_DETAIL_FACES * detail; +			if (hollow) +			{ +				hole_type = params.getCurveType() & LL_PCODE_HOLE_MASK; +				if (hole_type == LL_PCODE_HOLE_SQUARE) +				{ +					// Snap to the next multiple of four sides, +					// so that corners line up. +					circle_detail = llceil(circle_detail / 4.0f) * 4.0f; +				} +			} + +			S32 sides = (S32)circle_detail; + +			if (is_sculpted) +				sides = sculpt_size; +			 +			np = getNumNGonPoints(params, sides); +			 +			if (hollow) +			{ +				np *= 2; +			} +		} +		break; +	case LL_PCODE_PROFILE_CIRCLE_HALF: +		{ +			// If this has a square hollow, we should adjust the +			// number of faces a bit so that the geometry lines up. +			U8 hole_type=0; +			// Number of faces is cut in half because it's only a half-circle. +			F32 circle_detail = MIN_DETAIL_FACES * detail * 0.5f; +			if (hollow) +			{ +				hole_type = params.getCurveType() & LL_PCODE_HOLE_MASK; +				if (hole_type == LL_PCODE_HOLE_SQUARE) +				{ +					// Snap to the next multiple of four sides (div 2), +					// so that corners line up. +					circle_detail = llceil(circle_detail / 2.0f) * 2.0f; +				} +			} +			np = getNumNGonPoints(params, llfloor(circle_detail), 0.5f, 0.f, 0.5f); +			 +			if (hollow) +			{ +				np *= 2; +			} + +			// Special case for openness of sphere +			if ((params.getEnd() - params.getBegin()) < 1.f) +			{ +			} +			else if (!hollow) +			{ +				np++; +			} +		} +		break; +	default: +	   break; +	}; + +	 +	return np; +}  BOOL LLProfile::generate(const LLProfileParams& params, BOOL path_open,F32 detail, S32 split, @@ -976,6 +1309,32 @@ LLPath::~LLPath()  {  } +S32 LLPath::getNumNGonPoints(const LLPathParams& params, S32 sides, F32 startOff, F32 end_scale, F32 twist_scale) +{ //this is basically LLPath::genNGon stripped down to only operations that influence the number of points added +	S32 ret = 0; + +	F32 step= 1.0f / sides; +	F32 t	= params.getBegin(); +	ret = 1; +	 +	t+=step; + +	// Snap to a quantized parameter, so that cut does not +	// affect most sample points. +	t = ((S32)(t * sides)) / (F32)sides; + +	// Run through the non-cut dependent points. +	while (t < params.getEnd()) +	{ +		ret++; +		t+=step; +	} + +	ret++; + +	return ret; +} +  void LLPath::genNGon(const LLPathParams& params, S32 sides, F32 startOff, F32 end_scale, F32 twist_scale)  {  	// Generates a circular path, starting at (1, 0, 0), counterclockwise along the xz plane. @@ -1153,6 +1512,56 @@ const LLVector2 LLPathParams::getEndScale() const  	return end_scale;  } +S32 LLPath::getNumPoints(const LLPathParams& params, F32 detail) +{ // this is basically LLPath::generate stripped down to only the operations that influence the number of points +	LLMemType m1(LLMemType::MTYPE_VOLUME); +	 +	if (detail < MIN_LOD) +	{ +		detail = MIN_LOD; +	} + +	S32 np = 2; // hardcode for line + +	// Is this 0xf0 mask really necessary?  DK 03/02/05 + +	switch (params.getCurveType() & 0xf0) +	{ +	default: +	case LL_PCODE_PATH_LINE: +		{ +			// Take the begin/end twist into account for detail. +			np    = llfloor(fabs(params.getTwistBegin() - params.getTwist()) * 3.5f * (detail-0.5f)) + 2; +		} +		break; + +	case LL_PCODE_PATH_CIRCLE: +		{ +			// Increase the detail as the revolutions and twist increase. +			F32 twist_mag = fabs(params.getTwistBegin() - params.getTwist()); + +			S32 sides = (S32)llfloor(llfloor((MIN_DETAIL_FACES * detail + twist_mag * 3.5f * (detail-0.5f))) * params.getRevolutions()); + +			np = sides; +		} +		break; + +	case LL_PCODE_PATH_CIRCLE2: +		{ +			//genNGon(params, llfloor(MIN_DETAIL_FACES * detail), 4.f, 0.f); +			np = getNumNGonPoints(params, llfloor(MIN_DETAIL_FACES * detail)); +		} +		break; + +	case LL_PCODE_PATH_TEST: + +		np     = 5; +		break; +	}; + +	return np; +} +  BOOL LLPath::generate(const LLPathParams& params, F32 detail, S32 split,  					  BOOL is_sculpted, S32 sculpt_size)  { @@ -1669,7 +2078,14 @@ LLVolume::LLVolume(const LLVolumeParams ¶ms, const F32 detail, const BOOL ge  	mFaceMask = 0x0;  	mDetail = detail;  	mSculptLevel = -2; -	 +	mSurfaceArea = 1.f; //only calculated for sculpts, defaults to 1 for all other prims +	mIsMeshAssetLoaded = FALSE; +	mLODScaleBias.setVec(1,1,1); +	mHullPoints = NULL; +	mHullIndices = NULL; +	mNumHullPoints = 0; +	mNumHullIndices = 0; +  	// set defaults  	if (mParams.getPathParams().getCurveType() == LL_PCODE_PATH_FLEXIBLE)  	{ @@ -1684,7 +2100,8 @@ LLVolume::LLVolume(const LLVolumeParams ¶ms, const F32 detail, const BOOL ge  	mGenerateSingleFace = generate_single_face;  	generate(); -	if (mParams.getSculptID().isNull() && params.getSculptType() == LL_SCULPT_TYPE_NONE) +	 +	if (mParams.getSculptID().isNull() && mParams.getSculptType() == LL_SCULPT_TYPE_NONE || mParams.getSculptType() == LL_SCULPT_TYPE_MESH)  	{  		createVolumeFaces();  	} @@ -1719,6 +2136,11 @@ LLVolume::~LLVolume()  	mPathp = NULL;  	mProfilep = NULL;  	mVolumeFaces.clear(); + +	ll_aligned_free_16(mHullPoints); +	mHullPoints = NULL; +	ll_aligned_free_16(mHullIndices); +	mHullIndices = NULL;  }  BOOL LLVolume::generate() @@ -1835,6 +2257,487 @@ BOOL LLVolume::generate()  	return FALSE;  } +void LLVolumeFace::VertexData::init() +{ +	if (!mData) +	{ +		mData = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*2); +	} +} + +LLVolumeFace::VertexData::VertexData() +{ +	mData = NULL; +	init(); +} +	 +LLVolumeFace::VertexData::VertexData(const VertexData& rhs) +{ +	mData = NULL; +	*this = rhs; +} + +const LLVolumeFace::VertexData& LLVolumeFace::VertexData::operator=(const LLVolumeFace::VertexData& rhs) +{ +	if (this != &rhs) +	{ +		init(); +		LLVector4a::memcpyNonAliased16((F32*) mData, (F32*) rhs.mData, 2*sizeof(LLVector4a)); +		mTexCoord = rhs.mTexCoord; +	} +	return *this; +} + +LLVolumeFace::VertexData::~VertexData() +{ +	ll_aligned_free_16(mData); +	mData = NULL; +} + +LLVector4a& LLVolumeFace::VertexData::getPosition() +{ +	return mData[POSITION]; +} + +LLVector4a& LLVolumeFace::VertexData::getNormal() +{ +	return mData[NORMAL]; +} + +const LLVector4a& LLVolumeFace::VertexData::getPosition() const +{ +	return mData[POSITION]; +} + +const LLVector4a& LLVolumeFace::VertexData::getNormal() const +{ +	return mData[NORMAL]; +} + + +void LLVolumeFace::VertexData::setPosition(const LLVector4a& pos) +{ +	mData[POSITION] = pos; +} + +void LLVolumeFace::VertexData::setNormal(const LLVector4a& norm) +{ +	mData[NORMAL] = norm; +} + +bool LLVolumeFace::VertexData::operator<(const LLVolumeFace::VertexData& rhs)const +{ +	const F32* lp = this->getPosition().getF32ptr(); +	const F32* rp = rhs.getPosition().getF32ptr(); + +	if (lp[0] != rp[0]) +	{ +		return lp[0] < rp[0]; +	} + +	if (rp[1] != lp[1]) +	{ +		return lp[1] < rp[1]; +	} + +	if (rp[2] != lp[2]) +	{ +		return lp[2] < rp[2]; +	} + +	lp = getNormal().getF32ptr(); +	rp = rhs.getNormal().getF32ptr(); + +	if (lp[0] != rp[0]) +	{ +		return lp[0] < rp[0]; +	} + +	if (rp[1] != lp[1]) +	{ +		return lp[1] < rp[1]; +	} + +	if (rp[2] != lp[2]) +	{ +		return lp[2] < rp[2]; +	} + +	if (mTexCoord.mV[0] != rhs.mTexCoord.mV[0]) +	{ +		return mTexCoord.mV[0] < rhs.mTexCoord.mV[0]; +	} + +	return mTexCoord.mV[1] < rhs.mTexCoord.mV[1]; +} + +bool LLVolumeFace::VertexData::operator==(const LLVolumeFace::VertexData& rhs)const +{ +	return mData[POSITION].equals3(rhs.getPosition()) && +			mData[NORMAL].equals3(rhs.getNormal()) && +			mTexCoord == rhs.mTexCoord; +} + +bool LLVolumeFace::VertexData::compareNormal(const LLVolumeFace::VertexData& rhs, F32 angle_cutoff) const +{ +	bool retval = false; + +	const F32 epsilon = 0.00001f; + +	if (rhs.mData[POSITION].equals3(mData[POSITION], epsilon) &&  +		fabs(rhs.mTexCoord[0]-mTexCoord[0]) < epsilon && +		fabs(rhs.mTexCoord[1]-mTexCoord[1]) < epsilon) +	{ +		if (angle_cutoff > 1.f) +		{ +			retval = (mData[NORMAL].equals3(rhs.mData[NORMAL], epsilon)); +		} +		else +		{ +			F32 cur_angle = rhs.mData[NORMAL].dot3(mData[NORMAL]).getF32(); +			retval = cur_angle > angle_cutoff; +		} +	} + +	return retval; +} + +bool LLVolume::unpackVolumeFaces(std::istream& is, S32 size) +{ +	//input stream is now pointing at a zlib compressed block of LLSD +	//decompress block +	LLSD mdl; +	if (!unzip_llsd(mdl, is, size)) +	{ +		LL_DEBUGS("MeshStreaming") << "Failed to unzip LLSD blob for LoD, will probably fetch from sim again." << llendl; +		return false; +	} +	 +	{ +		U32 face_count = mdl.size(); + +		if (face_count == 0) +		{ //no faces unpacked, treat as failed decode +			llwarns << "found no faces!" << llendl; +			return false; +		} + +		mVolumeFaces.resize(face_count); + +		for (U32 i = 0; i < face_count; ++i) +		{ +			LLVolumeFace& face = mVolumeFaces[i]; + +			if (mdl[i].has("NoGeometry")) +			{ //face has no geometry, continue +				face.resizeIndices(3); +				face.resizeVertices(1); +				memset(face.mPositions, 0, sizeof(LLVector4a)); +				memset(face.mNormals, 0, sizeof(LLVector4a)); +				memset(face.mTexCoords, 0, sizeof(LLVector2)); +				memset(face.mIndices, 0, sizeof(U16)*3); +				continue; +			} + +			LLSD::Binary pos = mdl[i]["Position"]; +			LLSD::Binary norm = mdl[i]["Normal"]; +			LLSD::Binary tc = mdl[i]["TexCoord0"]; +			LLSD::Binary idx = mdl[i]["TriangleList"]; + +			 + +			//copy out indices +			face.resizeIndices(idx.size()/2); +			 +			if (idx.empty() || face.mNumIndices < 3) +			{ //why is there an empty index list? +				llwarns <<"Empty face present!" << llendl; +				continue; +			} + +			U16* indices = (U16*) &(idx[0]); +			U32 count = idx.size()/2; +			for (U32 j = 0; j < count; ++j) +			{ +				face.mIndices[j] = indices[j]; +			} + +			//copy out vertices +			U32 num_verts = pos.size()/(3*2); +			face.resizeVertices(num_verts); + +			LLVector3 minp; +			LLVector3 maxp; +			LLVector2 min_tc;  +			LLVector2 max_tc;  +		 +			minp.setValue(mdl[i]["PositionDomain"]["Min"]); +			maxp.setValue(mdl[i]["PositionDomain"]["Max"]); +			LLVector4a min_pos, max_pos; +			min_pos.load3(minp.mV); +			max_pos.load3(maxp.mV); + +			min_tc.setValue(mdl[i]["TexCoord0Domain"]["Min"]); +			max_tc.setValue(mdl[i]["TexCoord0Domain"]["Max"]); + +			LLVector4a pos_range; +			pos_range.setSub(max_pos, min_pos); +			LLVector2 tc_range2 = max_tc - min_tc; +			LLVector4a tc_range; +			tc_range.set(tc_range2[0], tc_range2[1], tc_range2[0], tc_range2[1]); +			LLVector4a min_tc4(min_tc[0], min_tc[1], min_tc[0], min_tc[1]); + +			LLVector4a* pos_out = face.mPositions; +			LLVector4a* norm_out = face.mNormals; +			LLVector4a* tc_out = (LLVector4a*) face.mTexCoords; + +			{ +				U16* v = (U16*) &(pos[0]); +				for (U32 j = 0; j < num_verts; ++j) +				{ +					pos_out->set((F32) v[0], (F32) v[1], (F32) v[2]); +					pos_out->div(65535.f); +					pos_out->mul(pos_range); +					pos_out->add(min_pos); +					pos_out++; +					v += 3; +				} + +			} + +			{ +				if (!norm.empty()) +				{ +					U16* n = (U16*) &(norm[0]); +					for (U32 j = 0; j < num_verts; ++j) +					{ +						norm_out->set((F32) n[0], (F32) n[1], (F32) n[2]); +						norm_out->div(65535.f); +						norm_out->mul(2.f); +						norm_out->sub(1.f); +						norm_out++; +						n += 3; +					} +				} +				else +				{ +					memset(norm_out, 0, sizeof(LLVector4a)*num_verts); +				} +			} + +			{ +				if (!tc.empty()) +				{ +					U16* t = (U16*) &(tc[0]); +					for (U32 j = 0; j < num_verts; j+=2) +					{ +						if (j < num_verts-1) +						{ +							tc_out->set((F32) t[0], (F32) t[1], (F32) t[2], (F32) t[3]); +						} +						else +						{ +							tc_out->set((F32) t[0], (F32) t[1], 0.f, 0.f); +						} + +						t += 4; + +						tc_out->div(65535.f); +						tc_out->mul(tc_range); +						tc_out->add(min_tc4); + +						tc_out++; +					} +				} +				else +				{ +					memset(tc_out, 0, sizeof(LLVector2)*num_verts); +				} +			} + +			if (mdl[i].has("Weights")) +			{ +				face.allocateWeights(num_verts); + +				LLSD::Binary weights = mdl[i]["Weights"]; + +				U32 idx = 0; + +				U32 cur_vertex = 0; +				while (idx < weights.size() && cur_vertex < num_verts) +				{ +					const U8 END_INFLUENCES = 0xFF; +					U8 joint = weights[idx++]; + +					U32 cur_influence = 0; +					LLVector4 wght(0,0,0,0); + +					while (joint != END_INFLUENCES && idx < weights.size()) +					{ +						U16 influence = weights[idx++]; +						influence |= ((U16) weights[idx++] << 8); + +						F32 w = llclamp((F32) influence / 65535.f, 0.f, 0.99999f); +						wght.mV[cur_influence++] = (F32) joint + w; + +						if (cur_influence >= 4) +						{ +							joint = END_INFLUENCES; +						} +						else +						{ +							joint = weights[idx++]; +						} +					} + +					face.mWeights[cur_vertex].loadua(wght.mV); + +					cur_vertex++; +				} + +				if (cur_vertex != num_verts || idx != weights.size()) +				{ +					llwarns << "Vertex weight count does not match vertex count!" << llendl; +				} +					 +			} + +			// modifier flags? +			bool do_mirror = (mParams.getSculptType() & LL_SCULPT_FLAG_MIRROR); +			bool do_invert = (mParams.getSculptType() &LL_SCULPT_FLAG_INVERT); +			 +			 +			// translate to actions: +			bool do_reflect_x = false; +			bool do_reverse_triangles = false; +			bool do_invert_normals = false; +			 +			if (do_mirror) +			{ +				do_reflect_x = true; +				do_reverse_triangles = !do_reverse_triangles; +			} +			 +			if (do_invert) +			{ +				do_invert_normals = true; +				do_reverse_triangles = !do_reverse_triangles; +			} +			 +			// now do the work + +			if (do_reflect_x) +			{ +				LLVector4a* p = (LLVector4a*) face.mPositions; +				LLVector4a* n = (LLVector4a*) face.mNormals; +				 +				for (S32 i = 0; i < face.mNumVertices; i++) +				{ +					p[i].mul(-1.0f); +					n[i].mul(-1.0f); +				} +			} + +			if (do_invert_normals) +			{ +				LLVector4a* n = (LLVector4a*) face.mNormals; +				 +				for (S32 i = 0; i < face.mNumVertices; i++) +				{ +					n[i].mul(-1.0f); +				} +			} + +			if (do_reverse_triangles) +			{ +				for (U32 j = 0; j < face.mNumIndices; j += 3) +				{ +					// swap the 2nd and 3rd index +					S32 swap = face.mIndices[j+1]; +					face.mIndices[j+1] = face.mIndices[j+2]; +					face.mIndices[j+2] = swap; +				} +			} + +			//calculate bounding box +			LLVector4a& min = face.mExtents[0]; +			LLVector4a& max = face.mExtents[1]; + +			if (face.mNumVertices < 3) +			{ //empty face, use a dummy 1cm (at 1m scale) bounding box +				min.splat(-0.005f); +				max.splat(0.005f); +			} +			else +			{ +				min = max = face.mPositions[0]; + +				for (S32 i = 1; i < face.mNumVertices; ++i) +				{ +					min.setMin(min, face.mPositions[i]); +					max.setMax(max, face.mPositions[i]); +				} + +				if (face.mTexCoords) +				{ +					LLVector2& min_tc = face.mTexCoordExtents[0]; +					LLVector2& max_tc = face.mTexCoordExtents[1]; + +					min_tc = face.mTexCoords[0]; +					max_tc = face.mTexCoords[0]; + +					for (U32 j = 1; j < face.mNumVertices; ++j) +					{ +						update_min_max(min_tc, max_tc, face.mTexCoords[j]); +					} +				} +				else +				{ +					face.mTexCoordExtents[0].set(0,0); +					face.mTexCoordExtents[1].set(1,1); +				} +			} +		} +	} +	 +	mSculptLevel = 0;  // success! + +	cacheOptimize(); + +	return true; +} + + +BOOL LLVolume::isMeshAssetLoaded() +{ +	return mIsMeshAssetLoaded; +} + +void LLVolume::setMeshAssetLoaded(BOOL loaded) +{ +	mIsMeshAssetLoaded = loaded; +} + +void LLVolume::copyVolumeFaces(const LLVolume* volume) +{ +	mVolumeFaces = volume->mVolumeFaces; +	mSculptLevel = 0; +} + +void LLVolume::cacheOptimize() +{ +	for (S32 i = 0; i < mVolumeFaces.size(); ++i) +	{ +		mVolumeFaces[i].cacheOptimize(); +	} +} + + +S32	LLVolume::getNumFaces() const +{ +	return mIsMeshAssetLoaded ? getNumVolumeFaces() : (S32)mProfilep->mFaces.size(); +} +  void LLVolume::createVolumeFaces()  { @@ -2001,7 +2904,7 @@ F32 LLVolume::sculptGetSurfaceArea()  			// compute the area of the quad by taking the length of the cross product of the two triangles  			LLVector3 cross1 = (p1 - p2) % (p1 - p3);  			LLVector3 cross2 = (p4 - p2) % (p4 - p3); -			area += (cross1.magVec() + cross2.magVec()) / 2.0; +			area += (cross1.magVec() + cross2.magVec()) / 2.f;  		}  	} @@ -2188,7 +3091,7 @@ void sculpt_calc_mesh_resolution(U16 width, U16 height, U8 type, F32 detail, S32  		ratio = (F32) width / (F32) height; -	s = (S32)fsqrtf(((F32)vertices / ratio)); +	s = (S32)(F32) sqrt(((F32)vertices / ratio));  	s = llmax(s, 4);              // no degenerate sizes, please  	t = vertices / s; @@ -2240,7 +3143,13 @@ void LLVolume::sculpt(U16 sculpt_width, U16 sculpt_height, S8 sculpt_components,  		// don't test lowest LOD to support legacy content DEV-33670  		if (mDetail > SCULPT_MIN_AREA_DETAIL)  		{ -			if (sculptGetSurfaceArea() < SCULPT_MIN_AREA) +			F32 area = sculptGetSurfaceArea(); + +			mSurfaceArea = area; + +			const F32 SCULPT_MAX_AREA = 384.f; + +			if (area < SCULPT_MIN_AREA || area > SCULPT_MAX_AREA)  			{  				data_is_empty = TRUE;  			} @@ -2281,6 +3190,16 @@ BOOL LLVolume::isFlat(S32 face)  } +bool LLVolumeParams::isSculpt() const +{ +	return mSculptID.notNull(); +} + +bool LLVolumeParams::isMeshSculpt() const +{ +	return isSculpt() && ((mSculptType & LL_SCULPT_TYPE_MASK) == LL_SCULPT_TYPE_MESH); +} +  bool LLVolumeParams::operator==(const LLVolumeParams ¶ms) const  {  	return ( (getPathParams() == params.getPathParams()) && @@ -2314,7 +3233,6 @@ bool LLVolumeParams::operator<(const LLVolumeParams ¶ms) const  		return mSculptID < params.mSculptID;  	} -  	return mSculptType < params.mSculptType; @@ -3315,6 +4233,23 @@ S32 *LLVolume::getTriangleIndices(U32 &num_indices) const  	return index;  } +void LLVolume::getLoDTriangleCounts(const LLVolumeParams& params, S32* counts) +{ //attempt to approximate the number of triangles that will result from generating a volume LoD set for the  +	//supplied LLVolumeParams -- inaccurate, but a close enough approximation for determining streaming cost +	F32 detail[] = {1.f, 1.5f, 2.5f, 4.f};	 +	for (S32 i = 0; i < 4; i++) +	{ +		S32 count = 0; +		S32 path_points = LLPath::getNumPoints(params.getPathParams(), detail[i]); +		S32 profile_points = LLProfile::getNumPoints(params.getProfileParams(), false, detail[i]); + +		count = (profile_points-1)*2*(path_points-1); +		count += profile_points*2; + +		counts[i] = count; +	} +} +  S32 LLVolume::getNumTriangleIndices() const  {  	BOOL profile_open = getProfile().isOpen(); @@ -3372,34 +4307,72 @@ S32 LLVolume::getNumTriangleIndices() const  	return count;  } + +S32 LLVolume::getNumTriangles(S32* vcount) const +{ +	U32 triangle_count = 0; +	U32 vertex_count = 0; + +	for (S32 i = 0; i < getNumVolumeFaces(); ++i) +	{ +		const LLVolumeFace& face = getVolumeFace(i); +		triangle_count += face.mNumIndices/3; + +		vertex_count += face.mNumVertices; +	} + + +	if (vcount) +	{ +		*vcount = vertex_count; +	} +	 +	return triangle_count; +} + +  //-----------------------------------------------------------------------------  // generateSilhouetteVertices()  //-----------------------------------------------------------------------------  void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices,  										  std::vector<LLVector3> &normals, -										  std::vector<S32> &segments, -										  const LLVector3& obj_cam_vec, -										  const LLMatrix4& mat, -										  const LLMatrix3& norm_mat, +										  const LLVector3& obj_cam_vec_in, +										  const LLMatrix4& mat_in, +										  const LLMatrix3& norm_mat_in,  										  S32 face_mask)  {  	LLMemType m1(LLMemType::MTYPE_VOLUME); -	 + +	LLMatrix4a mat; +	mat.loadu(mat_in); + +	LLMatrix4a norm_mat; +	norm_mat.loadu(norm_mat_in); +		 +	LLVector4a obj_cam_vec; +	obj_cam_vec.load3(obj_cam_vec_in.mV); +  	vertices.clear();  	normals.clear(); -	segments.clear(); +	if ((mParams.getSculptType() & LL_SCULPT_TYPE_MASK) == LL_SCULPT_TYPE_MESH) +	{ +		return; +	} +	  	S32 cur_index = 0;  	//for each face  	for (face_list_t::iterator iter = mVolumeFaces.begin();  		 iter != mVolumeFaces.end(); ++iter)  	{ -		const LLVolumeFace& face = *iter; +		LLVolumeFace& face = *iter; -		if (!(face_mask & (0x1 << cur_index++))) +		if (!(face_mask & (0x1 << cur_index++)) || +		     face.mNumIndices == 0 || face.mEdge.empty())  		{  			continue;  		} +  		if (face.mTypeMask & (LLVolumeFace::CAP_MASK)) {  		} @@ -3412,7 +4385,7 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices,  #if DEBUG_SILHOUETTE_EDGE_MAP  			//for each triangle -			U32 count = face.mIndices.size(); +			U32 count = face.mNumIndices;  			for (U32 j = 0; j < count/3; j++) {  				//get vertices  				S32 v1 = face.mIndices[j*3+0]; @@ -3420,9 +4393,9 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices,  				S32 v3 = face.mIndices[j*3+2];  				//get current face center -				LLVector3 cCenter = (face.mVertices[v1].mPosition +  -									face.mVertices[v2].mPosition +  -									face.mVertices[v3].mPosition) / 3.0f; +				LLVector3 cCenter = (face.mVertices[v1].getPosition() +  +									face.mVertices[v2].getPosition() +  +									face.mVertices[v3].getPosition()) / 3.0f;  				//for each edge  				for (S32 k = 0; k < 3; k++) { @@ -3440,9 +4413,9 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices,  					v3 = face.mIndices[nIndex*3+2];  					//get neighbor face center -					LLVector3 nCenter = (face.mVertices[v1].mPosition +  -									face.mVertices[v2].mPosition +  -									face.mVertices[v3].mPosition) / 3.0f; +					LLVector3 nCenter = (face.mVertices[v1].getPosition() +  +									face.mVertices[v2].getPosition() +  +									face.mVertices[v3].getPosition()) / 3.0f;  					//draw line  					vertices.push_back(cCenter); @@ -3465,15 +4438,15 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices,  #elif DEBUG_SILHOUETTE_NORMALS  			//for each vertex -			for (U32 j = 0; j < face.mVertices.size(); j++) { -				vertices.push_back(face.mVertices[j].mPosition); -				vertices.push_back(face.mVertices[j].mPosition + face.mVertices[j].mNormal*0.1f); +			for (U32 j = 0; j < face.mNumVertices; j++) { +				vertices.push_back(face.mVertices[j].getPosition()); +				vertices.push_back(face.mVertices[j].getPosition() + face.mVertices[j].getNormal()*0.1f);  				normals.push_back(LLVector3(0,0,1));  				normals.push_back(LLVector3(0,0,1));  				segments.push_back(vertices.size());  #if DEBUG_SILHOUETTE_BINORMALS -				vertices.push_back(face.mVertices[j].mPosition); -				vertices.push_back(face.mVertices[j].mPosition + face.mVertices[j].mBinormal*0.1f); +				vertices.push_back(face.mVertices[j].getPosition()); +				vertices.push_back(face.mVertices[j].getPosition() + face.mVertices[j].mBinormal*0.1f);  				normals.push_back(LLVector3(0,0,1));  				normals.push_back(LLVector3(0,0,1));  				segments.push_back(vertices.size()); @@ -3491,26 +4464,36 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices,  			//for each triangle  			std::vector<U8> fFacing; -			vector_append(fFacing, face.mIndices.size()/3); -			for (U32 j = 0; j < face.mIndices.size()/3; j++)  +			vector_append(fFacing, face.mNumIndices/3); + +			LLVector4a* v = (LLVector4a*) face.mPositions; +			LLVector4a* n = (LLVector4a*) face.mNormals; + +			for (U32 j = 0; j < face.mNumIndices/3; j++)   			{  				//approximate normal  				S32 v1 = face.mIndices[j*3+0];  				S32 v2 = face.mIndices[j*3+1];  				S32 v3 = face.mIndices[j*3+2]; -				LLVector3 norm = (face.mVertices[v1].mPosition - face.mVertices[v2].mPosition) %  -					(face.mVertices[v2].mPosition - face.mVertices[v3].mPosition); -				 -				if (norm.magVecSquared() < 0.00000001f)  +				LLVector4a c1,c2; +				c1.setSub(v[v1], v[v2]); +				c2.setSub(v[v2], v[v3]); + +				LLVector4a norm; + +				norm.setCross3(c1, c2); + +				if (norm.dot3(norm) < 0.00000001f)   				{  					fFacing[j] = AWAY | TOWARDS;  				}  				else   				{  					//get view vector -					LLVector3 view = (obj_cam_vec-face.mVertices[v1].mPosition); -					bool away = view * norm > 0.0f;  +					LLVector4a view; +					view.setSub(obj_cam_vec, v[v1]); +					bool away = view.dot3(norm) > 0.0f;   					if (away)   					{  						fFacing[j] = AWAY; @@ -3523,7 +4506,7 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices,  			}  			//for each triangle -			for (U32 j = 0; j < face.mIndices.size()/3; j++)  +			for (U32 j = 0; j < face.mNumIndices/3; j++)   			{  				if (fFacing[j] == (AWAY | TOWARDS))   				{ //this is a degenerate triangle @@ -3556,17 +4539,21 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices,  						S32 v1 = face.mIndices[j*3+k];  						S32 v2 = face.mIndices[j*3+((k+1)%3)]; -						vertices.push_back(face.mVertices[v1].mPosition*mat); -						LLVector3 norm1 = face.mVertices[v1].mNormal * norm_mat; -						norm1.normVec(); -						normals.push_back(norm1); +						LLVector4a t; +						mat.affineTransform(v[v1], t); +						vertices.push_back(LLVector3(t[0], t[1], t[2])); + +						norm_mat.rotate(n[v1], t); -						vertices.push_back(face.mVertices[v2].mPosition*mat); -						LLVector3 norm2 = face.mVertices[v2].mNormal * norm_mat; -						norm2.normVec(); -						normals.push_back(norm2); +						t.normalize3fast(); +						normals.push_back(LLVector3(t[0], t[1], t[2])); -						segments.push_back(vertices.size()); +						mat.affineTransform(v[v2], t); +						vertices.push_back(LLVector3(t[0], t[1], t[2])); +						 +						norm_mat.rotate(n[v2], t); +						t.normalize3fast(); +						normals.push_back(LLVector3(t[0], t[1], t[2]));  					}  				}		  			} @@ -3579,6 +4566,19 @@ S32 LLVolume::lineSegmentIntersect(const LLVector3& start, const LLVector3& end,  								   S32 face,  								   LLVector3* intersection,LLVector2* tex_coord, LLVector3* normal, LLVector3* bi_normal)  { +	LLVector4a starta, enda; +	starta.load3(start.mV); +	enda.load3(end.mV); + +	return lineSegmentIntersect(starta, enda, face, intersection, tex_coord, normal, bi_normal); + +} + + +S32 LLVolume::lineSegmentIntersect(const LLVector4a& start, const LLVector4a& end,  +								   S32 face, +								   LLVector3* intersection,LLVector2* tex_coord, LLVector3* normal, LLVector3* bi_normal) +{  	S32 hit_face = -1;  	S32 start_face; @@ -3595,16 +4595,23 @@ S32 LLVolume::lineSegmentIntersect(const LLVector3& start, const LLVector3& end,  		end_face = face;  	} -	LLVector3 dir = end - start; +	LLVector4a dir; +	dir.setSub(end, start);  	F32 closest_t = 2.f; // must be larger than 1 +	end_face = llmin(end_face, getNumVolumeFaces()-1); +  	for (S32 i = start_face; i <= end_face; i++)  	{ -		const LLVolumeFace &face = getVolumeFace((U32)i); +		LLVolumeFace &face = mVolumeFaces[i]; + +		LLVector4a box_center; +		box_center.setAdd(face.mExtents[0], face.mExtents[1]); +		box_center.mul(0.5f); -		LLVector3 box_center = (face.mExtents[0] + face.mExtents[1]) / 2.f; -		LLVector3 box_size   = face.mExtents[1] - face.mExtents[0]; +		LLVector4a box_size; +		box_size.setSub(face.mExtents[1], face.mExtents[0]);          if (LLLineSegmentBoxIntersect(start, end, box_center, box_size))  		{ @@ -3612,57 +4619,85 @@ S32 LLVolume::lineSegmentIntersect(const LLVector3& start, const LLVector3& end,  			{  				genBinormals(i);  			} -			 -			for (U32 tri = 0; tri < face.mIndices.size()/3; tri++)  -			{ -				S32 index1 = face.mIndices[tri*3+0]; -				S32 index2 = face.mIndices[tri*3+1]; -				S32 index3 = face.mIndices[tri*3+2]; - -				F32 a, b, t; -			 -				if (LLTriangleRayIntersect(face.mVertices[index1].mPosition, -										   face.mVertices[index2].mPosition, -										   face.mVertices[index3].mPosition, -										   start, dir, &a, &b, &t, FALSE)) -				{ -					if ((t >= 0.f) &&      // if hit is after start -						(t <= 1.f) &&      // and before end -						(t < closest_t))   // and this hit is closer -		{ -						closest_t = t; -						hit_face = i; - -						if (intersection != NULL) -						{ -							*intersection = start + dir * closest_t; -						} -			 -						if (tex_coord != NULL) -			{ -							*tex_coord = ((1.f - a - b)  * face.mVertices[index1].mTexCoord + -										  a              * face.mVertices[index2].mTexCoord + -										  b              * face.mVertices[index3].mTexCoord); -						} +			if (isUnique()) +			{ //don't bother with an octree for flexi volumes +				U32 tri_count = face.mNumIndices/3; -						if (normal != NULL) +				for (U32 j = 0; j < tri_count; ++j)  				{ -							*normal    = ((1.f - a - b)  * face.mVertices[index1].mNormal +  -										  a              * face.mVertices[index2].mNormal + -										  b              * face.mVertices[index3].mNormal); -						} +					U16 idx0 = face.mIndices[j*3+0]; +					U16 idx1 = face.mIndices[j*3+1]; +					U16 idx2 = face.mIndices[j*3+2]; + +					const LLVector4a& v0 = face.mPositions[idx0]; +					const LLVector4a& v1 = face.mPositions[idx1]; +					const LLVector4a& v2 = face.mPositions[idx2]; +				 +					F32 a,b,t; -						if (bi_normal != NULL) +					if (LLTriangleRayIntersect(v0, v1, v2, +							start, dir, a, b, t))  					{ -							*bi_normal = ((1.f - a - b)  * face.mVertices[index1].mBinormal +  -										  a              * face.mVertices[index2].mBinormal + -										  b              * face.mVertices[index3].mBinormal); +						if ((t >= 0.f) &&      // if hit is after start +							(t <= 1.f) &&      // and before end +							(t < closest_t))   // and this hit is closer +						{ +							closest_t = t; +							hit_face = i; + +							if (intersection != NULL) +							{ +								LLVector4a intersect = dir; +								intersect.mul(closest_t); +								intersect.add(start); +								intersection->set(intersect.getF32ptr()); +							} + + +							if (tex_coord != NULL) +							{ +								LLVector2* tc = (LLVector2*) face.mTexCoords; +								*tex_coord = ((1.f - a - b)  * tc[idx0] + +									a              * tc[idx1] + +									b              * tc[idx2]); + +							} + +							if (normal!= NULL) +							{ +								LLVector4* norm = (LLVector4*) face.mNormals; + +								*normal		= ((1.f - a - b)  * LLVector3(norm[idx0]) +  +									a              * LLVector3(norm[idx1]) + +									b              * LLVector3(norm[idx2])); +							} + +							if (bi_normal != NULL) +							{ +								LLVector4* binormal = (LLVector4*) face.mBinormals; +								*bi_normal = ((1.f - a - b)  * LLVector3(binormal[idx0]) +  +										a              * LLVector3(binormal[idx1]) + +										b              * LLVector3(binormal[idx2])); +							}  						} -  					}  				}  			} +			else +			{ +				if (!face.mOctree) +				{ +					face.createOctree(); +				} +			 +				LLOctreeTriangleRayIntersect intersect(start, dir, &face, &closest_t, intersection, tex_coord, normal, bi_normal); +				intersect.traverse(face.mOctree); +				if (intersect.mHitFace) +				{ +					hit_face = i; +				} +			}  		}		  	} @@ -4109,11 +5144,28 @@ BOOL LLVolumeParams::exportLegacyStream(std::ostream& output_stream) const  	return TRUE;  } +LLSD LLVolumeParams::sculptAsLLSD() const +{ +	LLSD sd = LLSD(); +	sd["id"] = getSculptID(); +	sd["type"] = getSculptType(); + +	return sd; +} + +bool LLVolumeParams::sculptFromLLSD(LLSD& sd) +{ +	setSculptID(sd["id"].asUUID(), (U8)sd["type"].asInteger()); +	return true; +} +  LLSD LLVolumeParams::asLLSD() const  {  	LLSD sd = LLSD();  	sd["path"] = mPathParams;  	sd["profile"] = mProfileParams; +	sd["sculpt"] = sculptAsLLSD(); +	  	return sd;  } @@ -4121,6 +5173,8 @@ bool LLVolumeParams::fromLLSD(LLSD& sd)  {  	mPathParams.fromLLSD(sd["path"]);  	mProfileParams.fromLLSD(sd["profile"]); +	sculptFromLLSD(sd["sculpt"]); +		  	return true;  } @@ -4163,6 +5217,12 @@ const F32 MIN_CONCAVE_PATH_WEDGE = 0.111111f;	// 1/9 unity  // for collison purposes  BOOL LLVolumeParams::isConvex() const  { +	if (!getSculptID().isNull()) +	{ +		// can't determine, be safe and say no: +		return FALSE; +	} +	  	F32 path_length = mPathParams.getEnd() - mPathParams.getBegin();  	F32 hollow = mProfileParams.getHollow(); @@ -4403,22 +5463,866 @@ std::ostream& operator<<(std::ostream &s, const LLVolume *volumep)  	return s;  } +LLVolumeFace::LLVolumeFace() :  +	mID(0), +	mTypeMask(0), +	mBeginS(0), +	mBeginT(0), +	mNumS(0), +	mNumT(0), +	mNumVertices(0), +	mNumIndices(0), +	mPositions(NULL), +	mNormals(NULL), +	mBinormals(NULL), +	mTexCoords(NULL), +	mIndices(NULL), +	mWeights(NULL), +	mOctree(NULL) +{ +	mExtents = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*3); +	mExtents[0].splat(-0.5f); +	mExtents[1].splat(0.5f); +	mCenter = mExtents+2; +} + +LLVolumeFace::LLVolumeFace(const LLVolumeFace& src) +:	mID(0), +	mTypeMask(0), +	mBeginS(0), +	mBeginT(0), +	mNumS(0), +	mNumT(0), +	mNumVertices(0), +	mNumIndices(0), +	mPositions(NULL), +	mNormals(NULL), +	mBinormals(NULL), +	mTexCoords(NULL), +	mIndices(NULL), +	mWeights(NULL), +	mOctree(NULL) +{  +	mExtents = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*3); +	mCenter = mExtents+2; +	*this = src; +} + +LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src) +{ +	if (&src == this) +	{ //self assignment, do nothing +		return *this; +	} + +	mID = src.mID; +	mTypeMask = src.mTypeMask; +	mBeginS = src.mBeginS; +	mBeginT = src.mBeginT; +	mNumS = src.mNumS; +	mNumT = src.mNumT; + +	mExtents[0] = src.mExtents[0]; +	mExtents[1] = src.mExtents[1]; +	*mCenter = *src.mCenter; + +	mNumVertices = 0; +	mNumIndices = 0; + +	freeData(); +	 +	LLVector4a::memcpyNonAliased16((F32*) mExtents, (F32*) src.mExtents, 3*sizeof(LLVector4a)); + +	resizeVertices(src.mNumVertices); +	resizeIndices(src.mNumIndices); + +	if (mNumVertices) +	{ +		S32 vert_size = mNumVertices*sizeof(LLVector4a); +		S32 tc_size = (mNumVertices*sizeof(LLVector2)+0xF) & ~0xF; +			 +		LLVector4a::memcpyNonAliased16((F32*) mPositions, (F32*) src.mPositions, vert_size); +		LLVector4a::memcpyNonAliased16((F32*) mNormals, (F32*) src.mNormals, vert_size); + +		if(src.mTexCoords) +		{ +			LLVector4a::memcpyNonAliased16((F32*) mTexCoords, (F32*) src.mTexCoords, tc_size); +		} +		else +		{ +			ll_aligned_free_16(mTexCoords) ; +			mTexCoords = NULL ; +		} + + +		if (src.mBinormals) +		{ +			allocateBinormals(src.mNumVertices); +			LLVector4a::memcpyNonAliased16((F32*) mBinormals, (F32*) src.mBinormals, vert_size); +		} +		else +		{ +			ll_aligned_free_16(mBinormals); +			mBinormals = NULL; +		} + +		if (src.mWeights) +		{ +			allocateWeights(src.mNumVertices); +			LLVector4a::memcpyNonAliased16((F32*) mWeights, (F32*) src.mWeights, vert_size); +		} +		else +		{ +			ll_aligned_free_16(mWeights); +			mWeights = NULL; +		} +	} + +	if (mNumIndices) +	{ +		S32 idx_size = (mNumIndices*sizeof(U16)+0xF) & ~0xF; +		 +		LLVector4a::memcpyNonAliased16((F32*) mIndices, (F32*) src.mIndices, idx_size); +	} +	 +	//delete  +	return *this; +} + +LLVolumeFace::~LLVolumeFace() +{ +	ll_aligned_free_16(mExtents); +	mExtents = NULL; + +	freeData(); +} + +void LLVolumeFace::freeData() +{ +	ll_aligned_free_16(mPositions); +	mPositions = NULL; +	ll_aligned_free_16( mNormals); +	mNormals = NULL; +	ll_aligned_free_16(mTexCoords); +	mTexCoords = NULL; +	ll_aligned_free_16(mIndices); +	mIndices = NULL; +	ll_aligned_free_16(mBinormals); +	mBinormals = NULL; +	ll_aligned_free_16(mWeights); +	mWeights = NULL; + +	delete mOctree; +	mOctree = NULL; +}  BOOL LLVolumeFace::create(LLVolume* volume, BOOL partial_build)  { +	//tree for this face is no longer valid +	delete mOctree; +	mOctree = NULL; + +	BOOL ret = FALSE ;  	if (mTypeMask & CAP_MASK)  	{ -		return createCap(volume, partial_build); +		ret = createCap(volume, partial_build);  	}  	else if ((mTypeMask & END_MASK) || (mTypeMask & SIDE_MASK))  	{ -		return createSide(volume, partial_build); +		ret = createSide(volume, partial_build);  	}  	else  	{  		llerrs << "Unknown/uninitialized face type!" << llendl; -		return FALSE;  	} + +	//update the range of the texture coordinates +	if(ret) +	{ +		mTexCoordExtents[0].setVec(1.f, 1.f) ; +		mTexCoordExtents[1].setVec(0.f, 0.f) ; + +		for(U32 i = 0 ; i < mNumVertices ; i++) +		{ +			if(mTexCoordExtents[0].mV[0] > mTexCoords[i].mV[0]) +			{ +				mTexCoordExtents[0].mV[0] = mTexCoords[i].mV[0] ; +			} +			if(mTexCoordExtents[1].mV[0] < mTexCoords[i].mV[0]) +			{ +				mTexCoordExtents[1].mV[0] = mTexCoords[i].mV[0] ; +			} + +			if(mTexCoordExtents[0].mV[1] > mTexCoords[i].mV[1]) +			{ +				mTexCoordExtents[0].mV[1] = mTexCoords[i].mV[1] ; +			} +			if(mTexCoordExtents[1].mV[1] < mTexCoords[i].mV[1]) +			{ +				mTexCoordExtents[1].mV[1] = mTexCoords[i].mV[1] ; +			}			 +		} +		mTexCoordExtents[0].mV[0] = llmax(0.f, mTexCoordExtents[0].mV[0]) ; +		mTexCoordExtents[0].mV[1] = llmax(0.f, mTexCoordExtents[0].mV[1]) ; +		mTexCoordExtents[1].mV[0] = llmin(1.f, mTexCoordExtents[1].mV[0]) ; +		mTexCoordExtents[1].mV[1] = llmin(1.f, mTexCoordExtents[1].mV[1]) ; +	} + +	return ret ; +} + +void LLVolumeFace::getVertexData(U16 index, LLVolumeFace::VertexData& cv) +{ +	cv.setPosition(mPositions[index]); +	if (mNormals) +	{ +		cv.setNormal(mNormals[index]); +	} +	else +	{ +		cv.getNormal().clear(); +	} + +	if (mTexCoords) +	{ +		cv.mTexCoord = mTexCoords[index]; +	} +	else +	{ +		cv.mTexCoord.clear(); +	} +} + +bool LLVolumeFace::VertexMapData::operator==(const LLVolumeFace::VertexData& rhs) const +{ +	return getPosition().equals3(rhs.getPosition()) && +		mTexCoord == rhs.mTexCoord && +		getNormal().equals3(rhs.getNormal()); +} + +bool LLVolumeFace::VertexMapData::ComparePosition::operator()(const LLVector3& a, const LLVector3& b) const +{ +	if (a.mV[0] != b.mV[0]) +	{ +		return a.mV[0] < b.mV[0]; +	} +	 +	if (a.mV[1] != b.mV[1]) +	{ +		return a.mV[1] < b.mV[1]; +	} +	 +	return a.mV[2] < b.mV[2]; +} + +void LLVolumeFace::optimize(F32 angle_cutoff) +{ +	LLVolumeFace new_face; + +	//map of points to vector of vertices at that point +	std::map<U64, std::vector<VertexMapData> > point_map; + +	LLVector4a range; +	range.setSub(mExtents[1],mExtents[0]); + +	//remove redundant vertices +	for (U32 i = 0; i < mNumIndices; ++i) +	{ +		U16 index = mIndices[i]; + +		LLVolumeFace::VertexData cv; +		getVertexData(index, cv); +		 +		BOOL found = FALSE; + +		LLVector4a pos; +		pos.setSub(mPositions[index], mExtents[0]); +		pos.div(range); + +		U64 pos64 = 0; + +		pos64 = (U16) (pos[0]*65535); +		pos64 = pos64 | (((U64) (pos[1]*65535)) << 16); +		pos64 = pos64 | (((U64) (pos[2]*65535)) << 32); + +		std::map<U64, std::vector<VertexMapData> >::iterator point_iter = point_map.find(pos64); +		 +		if (point_iter != point_map.end()) +		{ //duplicate point might exist +			for (U32 j = 0; j < point_iter->second.size(); ++j) +			{ +				LLVolumeFace::VertexData& tv = (point_iter->second)[j]; +				if (tv.compareNormal(cv, angle_cutoff)) +				{ +					found = TRUE; +					new_face.pushIndex((point_iter->second)[j].mIndex); +					break; +				} +			} +		} + +		if (!found) +		{ +			new_face.pushVertex(cv); +			U16 index = (U16) new_face.mNumVertices-1; +			new_face.pushIndex(index); + +			VertexMapData d; +			d.setPosition(cv.getPosition()); +			d.mTexCoord = cv.mTexCoord; +			d.setNormal(cv.getNormal()); +			d.mIndex = index; +			if (point_iter != point_map.end()) +			{ +				point_iter->second.push_back(d); +			} +			else +			{ +				point_map[pos64].push_back(d); +			} +		} +	} + +	llassert(new_face.mNumIndices == mNumIndices); +	llassert(new_face.mNumVertices <= mNumVertices); + +	if (angle_cutoff > 1.f && !mNormals) +	{ +		ll_aligned_free_16(new_face.mNormals); +		new_face.mNormals = NULL; +	} + +	if (!mTexCoords) +	{ +		ll_aligned_free_16(new_face.mTexCoords); +		new_face.mTexCoords = NULL; +	} + +	swapData(new_face); +} + +class LLVCacheTriangleData; + +class LLVCacheVertexData +{ +public: +	S32 mIdx; +	S32 mCacheTag; +	F32 mScore; +	U32 mActiveTriangles; +	std::vector<LLVCacheTriangleData*> mTriangles; + +	LLVCacheVertexData() +	{ +		mCacheTag = -1; +		mScore = 0.f; +		mActiveTriangles = 0; +		mIdx = -1; +	} +}; + +class LLVCacheTriangleData +{ +public: +	bool mActive; +	F32 mScore; +	LLVCacheVertexData* mVertex[3]; + +	LLVCacheTriangleData() +	{ +		mActive = true; +		mScore = 0.f; +		mVertex[0] = mVertex[1] = mVertex[2] = NULL; +	} + +	void complete() +	{ +		mActive = false; +		for (S32 i = 0; i < 3; ++i) +		{ +			if (mVertex[i]) +			{ +				llassert_always(mVertex[i]->mActiveTriangles > 0); +				mVertex[i]->mActiveTriangles--; +			} +		} +	} + +	bool operator<(const LLVCacheTriangleData& rhs) const +	{ //highest score first +		return rhs.mScore < mScore; +	} +}; + +const F32 FindVertexScore_CacheDecayPower = 1.5f; +const F32 FindVertexScore_LastTriScore = 0.75f; +const F32 FindVertexScore_ValenceBoostScale = 2.0f; +const F32 FindVertexScore_ValenceBoostPower = 0.5f; +const U32 MaxSizeVertexCache = 32; + +F32 find_vertex_score(LLVCacheVertexData& data) +{ +	if (data.mActiveTriangles == 0) +	{ //no triangle references this vertex +		return -1.f; +	} + +	F32 score = 0.f; + +	S32 cache_idx = data.mCacheTag; + +	if (cache_idx < 0) +	{ +		//not in cache +	} +	else +	{ +		if (cache_idx < 3) +		{ //vertex was in the last triangle +			score = FindVertexScore_LastTriScore; +		} +		else +		{ //more points for being higher in the cache +			F32 scaler = 1.f/(MaxSizeVertexCache-3); +			score = 1.f-((cache_idx-3)*scaler); +			score = powf(score, FindVertexScore_CacheDecayPower); +		} +	} + +	//bonus points for having low valence +	F32 valence_boost = powf((F32)data.mActiveTriangles, -FindVertexScore_ValenceBoostPower); +	score += FindVertexScore_ValenceBoostScale * valence_boost; + +	return score; +} + +class LLVCacheFIFO +{ +public: +	LLVCacheVertexData* mCache[MaxSizeVertexCache]; +	U32 mMisses; + +	LLVCacheFIFO() +	{ +		mMisses = 0; +		for (U32 i = 0; i < MaxSizeVertexCache; ++i) +		{ +			mCache[i] = NULL; +		} +	} + +	void addVertex(LLVCacheVertexData* data) +	{ +		if (data->mCacheTag == -1) +		{ +			mMisses++; + +			S32 end = MaxSizeVertexCache-1; + +			if (mCache[end]) +			{ +				mCache[end]->mCacheTag = -1; +			} + +			for (S32 i = end; i > 0; --i) +			{ +				mCache[i] = mCache[i-1]; +				if (mCache[i]) +				{ +					mCache[i]->mCacheTag = i; +				} +			} + +			mCache[0] = data; +			data->mCacheTag = 0; +		} +	} +}; + +class LLVCacheLRU +{ +public: +	LLVCacheVertexData* mCache[MaxSizeVertexCache+3]; + +	LLVCacheTriangleData* mBestTriangle; +	 +	U32 mMisses; + +	LLVCacheLRU() +	{ +		for (U32 i = 0; i < MaxSizeVertexCache+3; ++i) +		{ +			mCache[i] = NULL; +		} + +		mBestTriangle = NULL; +		mMisses = 0; +	} + +	void addVertex(LLVCacheVertexData* data) +	{ +		S32 end = MaxSizeVertexCache+2; +		if (data->mCacheTag != -1) +		{ //just moving a vertex to the front of the cache +			end = data->mCacheTag; +		} +		else +		{ +			mMisses++; +			if (mCache[end]) +			{ //adding a new vertex, vertex at end of cache falls off +				mCache[end]->mCacheTag = -1; +			} +		} + +		for (S32 i = end; i > 0; --i) +		{ //adjust cache pointers and tags +			mCache[i] = mCache[i-1]; + +			if (mCache[i]) +			{ +				mCache[i]->mCacheTag = i;			 +			} +		} + +		mCache[0] = data; +		mCache[0]->mCacheTag = 0; +	} + +	void addTriangle(LLVCacheTriangleData* data) +	{ +		addVertex(data->mVertex[0]); +		addVertex(data->mVertex[1]); +		addVertex(data->mVertex[2]); +	} + +	void updateScores() +	{ +		for (U32 i = MaxSizeVertexCache; i < MaxSizeVertexCache+3; ++i) +		{ //trailing 3 vertices aren't actually in the cache for scoring purposes +			if (mCache[i]) +			{ +				mCache[i]->mCacheTag = -1; +			} +		} + +		for (U32 i = 0; i < MaxSizeVertexCache; ++i) +		{ //update scores of vertices in cache +			if (mCache[i]) +			{ +				mCache[i]->mScore = find_vertex_score(*(mCache[i])); +				llassert_always(mCache[i]->mCacheTag == i); +			} +		} + +		mBestTriangle = NULL; +		//update triangle scores +		for (U32 i = 0; i < MaxSizeVertexCache+3; ++i) +		{ +			if (mCache[i]) +			{ +				for (U32 j = 0; j < mCache[i]->mTriangles.size(); ++j) +				{ +					LLVCacheTriangleData* tri = mCache[i]->mTriangles[j]; +					if (tri->mActive) +					{ +						tri->mScore = tri->mVertex[0]->mScore; +						tri->mScore += tri->mVertex[1]->mScore; +						tri->mScore += tri->mVertex[2]->mScore; + +						if (!mBestTriangle || mBestTriangle->mScore < tri->mScore) +						{ +							mBestTriangle = tri; +						} +					} +				} +			} +		} + +		//knock trailing 3 vertices off the cache +		for (U32 i = MaxSizeVertexCache; i < MaxSizeVertexCache+3; ++i) +		{ +			if (mCache[i]) +			{ +				llassert_always(mCache[i]->mCacheTag == -1); +				mCache[i] = NULL; +			} +		} +	} +}; + + +void LLVolumeFace::cacheOptimize() +{ //optimize for vertex cache according to Forsyth method:  +  // http://home.comcast.net/~tom_forsyth/papers/fast_vert_cache_opt.html +	 +	LLVCacheLRU cache; +	 +	if (mNumVertices < 3) +	{ //nothing to do +		return; +	} + +	//mapping of vertices to triangles and indices +	std::vector<LLVCacheVertexData> vertex_data; + +	//mapping of triangles do vertices +	std::vector<LLVCacheTriangleData> triangle_data; + +	triangle_data.resize(mNumIndices/3); +	vertex_data.resize(mNumVertices); + +	for (U32 i = 0; i < mNumIndices; i++) +	{ //populate vertex data and triangle data arrays +		U16 idx = mIndices[i]; +		U32 tri_idx = i/3; + +		vertex_data[idx].mTriangles.push_back(&(triangle_data[tri_idx])); +		vertex_data[idx].mIdx = idx; +		triangle_data[tri_idx].mVertex[i%3] = &(vertex_data[idx]); +	} + +	/*F32 pre_acmr = 1.f; +	//measure cache misses from before rebuild +	{ +		LLVCacheFIFO test_cache; +		for (U32 i = 0; i < mNumIndices; ++i) +		{ +			test_cache.addVertex(&vertex_data[mIndices[i]]); +		} + +		for (U32 i = 0; i < mNumVertices; i++) +		{ +			vertex_data[i].mCacheTag = -1; +		} + +		pre_acmr = (F32) test_cache.mMisses/(mNumIndices/3); +	}*/ + +	for (U32 i = 0; i < mNumVertices; i++) +	{ //initialize score values (no cache -- might try a fifo cache here) +		vertex_data[i].mScore = find_vertex_score(vertex_data[i]); +		vertex_data[i].mActiveTriangles = vertex_data[i].mTriangles.size(); + +		for (U32 j = 0; j < vertex_data[i].mTriangles.size(); ++j) +		{ +			vertex_data[i].mTriangles[j]->mScore += vertex_data[i].mScore; +		} +	} + +	//sort triangle data by score +	std::sort(triangle_data.begin(), triangle_data.end()); + +	std::vector<U16> new_indices; + +	LLVCacheTriangleData* tri; + +	//prime pump by adding first triangle to cache; +	tri = &(triangle_data[0]); +	cache.addTriangle(tri); +	new_indices.push_back(tri->mVertex[0]->mIdx); +	new_indices.push_back(tri->mVertex[1]->mIdx); +	new_indices.push_back(tri->mVertex[2]->mIdx); +	tri->complete(); + +	U32 breaks = 0; +	for (U32 i = 1; i < mNumIndices/3; ++i) +	{ +		cache.updateScores(); +		tri = cache.mBestTriangle; +		if (!tri) +		{ +			breaks++; +			for (U32 j = 0; j < triangle_data.size(); ++j) +			{ +				if (triangle_data[j].mActive) +				{ +					tri = &(triangle_data[j]); +					break; +				} +			} +		}	 +		 +		cache.addTriangle(tri); +		new_indices.push_back(tri->mVertex[0]->mIdx); +		new_indices.push_back(tri->mVertex[1]->mIdx); +		new_indices.push_back(tri->mVertex[2]->mIdx); +		tri->complete(); +	} + +	for (U32 i = 0; i < mNumIndices; ++i) +	{ +		mIndices[i] = new_indices[i]; +	} + +	/*F32 post_acmr = 1.f; +	//measure cache misses from after rebuild +	{ +		LLVCacheFIFO test_cache; +		for (U32 i = 0; i < mNumVertices; i++) +		{ +			vertex_data[i].mCacheTag = -1; +		} + +		for (U32 i = 0; i < mNumIndices; ++i) +		{ +			test_cache.addVertex(&vertex_data[mIndices[i]]); +		} +		 +		post_acmr = (F32) test_cache.mMisses/(mNumIndices/3); +	}*/ + +	//optimize for pre-TnL cache +	 +	//allocate space for new buffer +	S32 num_verts = mNumVertices; +	LLVector4a* pos = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); +	LLVector4a* norm = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); +	S32 size = ((num_verts*sizeof(LLVector2)) + 0xF) & ~0xF; +	LLVector2* tc = (LLVector2*) ll_aligned_malloc_16(size); + +	LLVector4a* wght = NULL; +	if (mWeights) +	{ +		wght = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); +	} + +	LLVector4a* binorm = NULL; +	if (mBinormals) +	{ +		binorm = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); +	} + +	//allocate mapping of old indices to new indices +	std::vector<S32> new_idx; +	new_idx.resize(mNumVertices, -1); + +	S32 cur_idx = 0; +	for (U32 i = 0; i < mNumIndices; ++i) +	{ +		U16 idx = mIndices[i]; +		if (new_idx[idx] == -1) +		{ //this vertex hasn't been added yet +			new_idx[idx] = cur_idx; + +			//copy vertex data +			pos[cur_idx] = mPositions[idx]; +			norm[cur_idx] = mNormals[idx]; +			tc[cur_idx] = mTexCoords[idx]; +			if (mWeights) +			{ +				wght[cur_idx] = mWeights[idx]; +			} +			if (mBinormals) +			{ +				binorm[cur_idx] = mBinormals[idx]; +			} + +			cur_idx++; +		} +	} + +	for (U32 i = 0; i < mNumIndices; ++i) +	{ +		mIndices[i] = new_idx[mIndices[i]]; +	} +	 +	ll_aligned_free_16(mPositions); +	ll_aligned_free_16(mNormals); +	ll_aligned_free_16(mTexCoords); +	ll_aligned_free_16(mWeights); +	ll_aligned_free_16(mBinormals); + +	mPositions = pos; +	mNormals = norm; +	mTexCoords = tc; +	mWeights = wght; +	mBinormals = binorm; + +	//std::string result = llformat("ACMR pre/post: %.3f/%.3f  --  %d triangles %d breaks", pre_acmr, post_acmr, mNumIndices/3, breaks); +	//llinfos << result << llendl; + +} + +void LLVolumeFace::createOctree(F32 scaler, const LLVector4a& center, const LLVector4a& size) +{ +	if (mOctree) +	{ +		return; +	} + +	mOctree = new LLOctreeRoot<LLVolumeTriangle>(center, size, NULL); +	new LLVolumeOctreeListener(mOctree); + +	for (U32 i = 0; i < mNumIndices; i+= 3) +	{ //for each triangle +		LLPointer<LLVolumeTriangle> tri = new LLVolumeTriangle(); +				 +		const LLVector4a& v0 = mPositions[mIndices[i]]; +		const LLVector4a& v1 = mPositions[mIndices[i+1]]; +		const LLVector4a& v2 = mPositions[mIndices[i+2]]; + +		//store pointers to vertex data +		tri->mV[0] = &v0; +		tri->mV[1] = &v1; +		tri->mV[2] = &v2; + +		//store indices +		tri->mIndex[0] = mIndices[i]; +		tri->mIndex[1] = mIndices[i+1]; +		tri->mIndex[2] = mIndices[i+2]; + +		//get minimum point +		LLVector4a min = v0; +		min.setMin(min, v1); +		min.setMin(min, v2); + +		//get maximum point +		LLVector4a max = v0; +		max.setMax(max, v1); +		max.setMax(max, v2); + +		//compute center +		LLVector4a center; +		center.setAdd(min, max); +		center.mul(0.5f); + +		tri->mPositionGroup = center; + +		//compute "radius" +		LLVector4a size; +		size.setSub(max,min); +		 +		tri->mRadius = size.getLength3().getF32() * scaler; +		 +		//insert +		mOctree->insert(tri); +	} + +	//remove unneeded octree layers +	while (!mOctree->balance())	{ } + +	//calculate AABB for each node +	LLVolumeOctreeRebound rebound(this); +	rebound.traverse(mOctree); + +	if (gDebugGL) +	{ +		LLVolumeOctreeValidate validate; +		validate.traverse(mOctree); +	} +} + + +void LLVolumeFace::swapData(LLVolumeFace& rhs) +{ +	llswap(rhs.mPositions, mPositions); +	llswap(rhs.mNormals, mNormals); +	llswap(rhs.mBinormals, mBinormals); +	llswap(rhs.mTexCoords, mTexCoords); +	llswap(rhs.mIndices,mIndices); +	llswap(rhs.mNumVertices, mNumVertices); +	llswap(rhs.mNumIndices, mNumIndices);  }  void	LerpPlanarVertex(LLVolumeFace::VertexData& v0, @@ -4428,10 +6332,21 @@ void	LerpPlanarVertex(LLVolumeFace::VertexData& v0,  				   F32	coef01,  				   F32	coef02)  { -	vout.mPosition = v0.mPosition + ((v1.mPosition-v0.mPosition)*coef01)+((v2.mPosition-v0.mPosition)*coef02); + +	LLVector4a lhs; +	lhs.setSub(v1.getPosition(), v0.getPosition()); +	lhs.mul(coef01); +	LLVector4a rhs; +	rhs.setSub(v2.getPosition(), v0.getPosition()); +	rhs.mul(coef02); + +	rhs.add(lhs); +	rhs.add(v0.getPosition()); + +	vout.setPosition(rhs); +		  	vout.mTexCoord = v0.mTexCoord + ((v1.mTexCoord-v0.mTexCoord)*coef01)+((v2.mTexCoord-v0.mTexCoord)*coef02); -	vout.mNormal = v0.mNormal; -	vout.mBinormal = v0.mBinormal; +	vout.setNormal(v0.getNormal());  }  BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) @@ -4451,84 +6366,113 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build)  	num_vertices = (grid_size+1)*(grid_size+1);  	num_indices = quad_count * 4; -	LLVector3& min = mExtents[0]; -	LLVector3& max = mExtents[1]; +	LLVector4a& min = mExtents[0]; +	LLVector4a& max = mExtents[1];  	S32 offset = 0;  	if (mTypeMask & TOP_MASK) +	{  		offset = (max_t-1) * max_s; +	}  	else +	{  		offset = mBeginS; +	} -	VertexData	corners[4]; -	VertexData baseVert; -	for(int t = 0; t < 4; t++){ -		corners[t].mPosition = mesh[offset + (grid_size*t)].mPos; -		corners[t].mTexCoord.mV[0] = profile[grid_size*t].mV[0]+0.5f; -		corners[t].mTexCoord.mV[1] = 0.5f - profile[grid_size*t].mV[1]; -	} -	baseVert.mNormal =  -		((corners[1].mPosition-corners[0].mPosition) %  -		(corners[2].mPosition-corners[1].mPosition)); -	baseVert.mNormal.normVec(); -	if(!(mTypeMask & TOP_MASK)){ -		baseVert.mNormal *= -1.0f; -	}else{ -		//Swap the UVs on the U(X) axis for top face -		LLVector2 swap; -		swap = corners[0].mTexCoord; -		corners[0].mTexCoord=corners[3].mTexCoord; -		corners[3].mTexCoord=swap; -		swap = corners[1].mTexCoord; -		corners[1].mTexCoord=corners[2].mTexCoord; -		corners[2].mTexCoord=swap; -	} -	baseVert.mBinormal = calc_binormal_from_triangle(  -		corners[0].mPosition, corners[0].mTexCoord, -		corners[1].mPosition, corners[1].mTexCoord, -		corners[2].mPosition, corners[2].mTexCoord); -	for(int t = 0; t < 4; t++){ -		corners[t].mBinormal = baseVert.mBinormal; -		corners[t].mNormal = baseVert.mNormal; -	} -	mHasBinormals = TRUE; - -	if (partial_build)  	{ -		mVertices.clear(); -	} +		VertexData	corners[4]; +		VertexData baseVert; +		for(S32 t = 0; t < 4; t++) +		{ +			corners[t].getPosition().load3( mesh[offset + (grid_size*t)].mPos.mV); +			corners[t].mTexCoord.mV[0] = profile[grid_size*t].mV[0]+0.5f; +			corners[t].mTexCoord.mV[1] = 0.5f - profile[grid_size*t].mV[1]; +		} -	S32	vtop = mVertices.size(); -	for(int gx = 0;gx<grid_size+1;gx++){ -		for(int gy = 0;gy<grid_size+1;gy++){ -			VertexData newVert; -			LerpPlanarVertex( -				corners[0], -				corners[1], -				corners[3], -				newVert, -				(F32)gx/(F32)grid_size, -				(F32)gy/(F32)grid_size); -			mVertices.push_back(newVert); +		{ +			LLVector4a lhs; +			lhs.setSub(corners[1].getPosition(), corners[0].getPosition()); +			LLVector4a rhs; +			rhs.setSub(corners[2].getPosition(), corners[1].getPosition()); +			baseVert.getNormal().setCross3(lhs, rhs);  +			baseVert.getNormal().normalize3fast(); +		} -			if (gx == 0 && gy == 0) -			{ -				min = max = newVert.mPosition; -			} -			else +		if(!(mTypeMask & TOP_MASK)) +		{ +			baseVert.getNormal().mul(-1.0f); +		} +		else +		{ +			//Swap the UVs on the U(X) axis for top face +			LLVector2 swap; +			swap = corners[0].mTexCoord; +			corners[0].mTexCoord=corners[3].mTexCoord; +			corners[3].mTexCoord=swap; +			swap = corners[1].mTexCoord; +			corners[1].mTexCoord=corners[2].mTexCoord; +			corners[2].mTexCoord=swap; +		} + +		LLVector4a binormal; +		 +		calc_binormal_from_triangle( binormal, +			corners[0].getPosition(), corners[0].mTexCoord, +			corners[1].getPosition(), corners[1].mTexCoord, +			corners[2].getPosition(), corners[2].mTexCoord); +		 +		binormal.normalize3fast(); + +		S32 size = (grid_size+1)*(grid_size+1); +		resizeVertices(size); +		allocateBinormals(size); + +		LLVector4a* pos = (LLVector4a*) mPositions; +		LLVector4a* norm = (LLVector4a*) mNormals; +		LLVector4a* binorm = (LLVector4a*) mBinormals; +		LLVector2* tc = (LLVector2*) mTexCoords; + +		for(int gx = 0;gx<grid_size+1;gx++) +		{ +			for(int gy = 0;gy<grid_size+1;gy++)  			{ -				update_min_max(min,max,newVert.mPosition); +				VertexData newVert; +				LerpPlanarVertex( +					corners[0], +					corners[1], +					corners[3], +					newVert, +					(F32)gx/(F32)grid_size, +					(F32)gy/(F32)grid_size); + +				*pos++ = newVert.getPosition(); +				*norm++ = baseVert.getNormal(); +				*tc++ = newVert.mTexCoord; +				*binorm++ = binormal; + +				if (gx == 0 && gy == 0) +				{ +					min = newVert.getPosition(); +					max = min; +				} +				else +				{ +					min.setMin(min, newVert.getPosition()); +					max.setMax(max, newVert.getPosition()); +				}  			}  		} -	} -	mCenter = (min + max) * 0.5f; +		mCenter->setAdd(min, max); +		mCenter->mul(0.5f);  +	}  	if (!partial_build)  	{ -#if GEN_TRI_STRIP -		mTriStrip.clear(); -#endif +		resizeIndices(grid_size*grid_size*6); + +		U16* out = mIndices; +  		S32 idxs[] = {0,1,(grid_size+1)+1,(grid_size+1)+1,(grid_size+1),0};  		for(S32 gx = 0;gx<grid_size;gx++)  		{ @@ -4539,61 +6483,18 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build)  				{  					for(S32 i=5;i>=0;i--)  					{ -						mIndices.push_back(vtop+(gy*(grid_size+1))+gx+idxs[i]); -					} -					 -#if GEN_TRI_STRIP -					if (gy == 0) -					{ -						mTriStrip.push_back((gx+1)*(grid_size+1)); -						mTriStrip.push_back((gx+1)*(grid_size+1)); -						mTriStrip.push_back(gx*(grid_size+1)); -					} - -					mTriStrip.push_back(gy+1+(gx+1)*(grid_size+1)); -					mTriStrip.push_back(gy+1+gx*(grid_size+1)); -					 -					 -					if (gy == grid_size-1) -					{ -						mTriStrip.push_back(gy+1+gx*(grid_size+1)); -					} -#endif +						*out++ = ((gy*(grid_size+1))+gx+idxs[i]); +					}		  				}  				else  				{  					for(S32 i=0;i<6;i++)  					{ -						mIndices.push_back(vtop+(gy*(grid_size+1))+gx+idxs[i]); -					} - -#if GEN_TRI_STRIP -					if (gy == 0) -					{ -						mTriStrip.push_back(gx*(grid_size+1)); -						mTriStrip.push_back(gx*(grid_size+1)); -						mTriStrip.push_back((gx+1)*(grid_size+1)); -					} - -					mTriStrip.push_back(gy+1+gx*(grid_size+1)); -					mTriStrip.push_back(gy+1+(gx+1)*(grid_size+1)); -					 -					if (gy == grid_size-1) -					{ -						mTriStrip.push_back(gy+1+(gx+1)*(grid_size+1)); +						*out++ = ((gy*(grid_size+1))+gx+idxs[i]);  					} -#endif  				} -			} -			 -		} - -#if GEN_TRI_STRIP -		if (mTriStrip.size()%2 == 1) -		{ -			mTriStrip.push_back(mTriStrip[mTriStrip.size()-1]); +			}	  		} -#endif  	}  	return TRUE; @@ -4623,17 +6524,31 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)  	num_vertices = profile.size();  	num_indices = (profile.size() - 2)*3; -	mVertices.resize(num_vertices); +	if (!(mTypeMask & HOLLOW_MASK) && !(mTypeMask & OPEN_MASK)) +	{ +		resizeVertices(num_vertices+1); +		allocateBinormals(num_vertices+1);	 -	if (!partial_build) +		if (!partial_build) +		{ +			resizeIndices(num_indices+3); +		} +	} +	else  	{ -		mIndices.resize(num_indices); +		resizeVertices(num_vertices); +		allocateBinormals(num_vertices); + +		if (!partial_build) +		{ +			resizeIndices(num_indices); +		}  	}  	S32 max_s = volume->getProfile().getTotal();  	S32 max_t = volume->getPath().mPath.size(); -	mCenter.clearVec(); +	mCenter->clear();  	S32 offset = 0;  	if (mTypeMask & TOP_MASK) @@ -4651,82 +6566,91 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)  	LLVector2 cuv;  	LLVector2 min_uv, max_uv; -	LLVector3& min = mExtents[0]; -	LLVector3& max = mExtents[1]; +	LLVector4a& min = mExtents[0]; +	LLVector4a& max = mExtents[1]; + +	LLVector2* tc = (LLVector2*) mTexCoords; +	LLVector4a* pos = (LLVector4a*) mPositions; +	LLVector4a* norm = (LLVector4a*) mNormals; +	LLVector4a* binorm = (LLVector4a*) mBinormals;  	// Copy the vertices into the array  	for (S32 i = 0; i < num_vertices; i++)  	{  		if (mTypeMask & TOP_MASK)  		{ -			mVertices[i].mTexCoord.mV[0] = profile[i].mV[0]+0.5f; -			mVertices[i].mTexCoord.mV[1] = profile[i].mV[1]+0.5f; +			tc[i].mV[0] = profile[i].mV[0]+0.5f; +			tc[i].mV[1] = profile[i].mV[1]+0.5f;  		}  		else  		{  			// Mirror for underside. -			mVertices[i].mTexCoord.mV[0] = profile[i].mV[0]+0.5f; -			mVertices[i].mTexCoord.mV[1] = 0.5f - profile[i].mV[1]; +			tc[i].mV[0] = profile[i].mV[0]+0.5f; +			tc[i].mV[1] = 0.5f - profile[i].mV[1];  		} -		mVertices[i].mPosition = mesh[i + offset].mPos; +		pos[i].load3(mesh[i + offset].mPos.mV);  		if (i == 0)  		{ -			min = max = mVertices[i].mPosition; -			min_uv = max_uv = mVertices[i].mTexCoord; +			max = pos[i]; +			min = max; +			min_uv = max_uv = tc[i];  		}  		else  		{ -			update_min_max(min,max, mVertices[i].mPosition); -			update_min_max(min_uv, max_uv, mVertices[i].mTexCoord); +			update_min_max(min,max,pos[i]); +			update_min_max(min_uv, max_uv, tc[i]);  		}  	} -	mCenter = (min+max)*0.5f; +	mCenter->setAdd(min, max); +	mCenter->mul(0.5f);  +  	cuv = (min_uv + max_uv)*0.5f; -	LLVector3 binormal = calc_binormal_from_triangle(  -		mCenter, cuv, -		mVertices[0].mPosition, mVertices[0].mTexCoord, -		mVertices[1].mPosition, mVertices[1].mTexCoord); -	binormal.normVec(); +	LLVector4a binormal; +	calc_binormal_from_triangle(binormal, +		*mCenter, cuv, +		pos[0], tc[0], +		pos[1], tc[1]); +	binormal.normalize3fast(); + +	LLVector4a normal; +	LLVector4a d0, d1; +	 -	LLVector3 d0; -	LLVector3 d1; -	LLVector3 normal; +	d0.setSub(*mCenter, pos[0]); +	d1.setSub(*mCenter, pos[1]); -	d0 = mCenter-mVertices[0].mPosition; -	d1 = mCenter-mVertices[1].mPosition; +	if (mTypeMask & TOP_MASK) +	{ +		normal.setCross3(d0, d1); +	} +	else +	{ +		normal.setCross3(d1, d0); +	} -	normal = (mTypeMask & TOP_MASK) ? (d0%d1) : (d1%d0); -	normal.normVec(); +	normal.normalize3fast();  	VertexData vd; -	vd.mPosition = mCenter; -	vd.mNormal = normal; -	vd.mBinormal = binormal; +	vd.setPosition(*mCenter);  	vd.mTexCoord = cuv;  	if (!(mTypeMask & HOLLOW_MASK) && !(mTypeMask & OPEN_MASK))  	{ -		mVertices.push_back(vd); +		pos[num_vertices] = *mCenter; +		tc[num_vertices] = cuv;  		num_vertices++; -		if (!partial_build) -		{ -			vector_append(mIndices, 3); -		}  	} -	  	for (S32 i = 0; i < num_vertices; i++)  	{ -		mVertices[i].mBinormal = binormal; -		mVertices[i].mNormal = normal; +		binorm[i].load4a(binormal.getF32ptr()); +		norm[i].load4a(normal.getF32ptr());  	} -	mHasBinormals = TRUE; -  	if (partial_build)  	{  		return TRUE; @@ -4834,8 +6758,6 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)  					pt2--;  				}  			} - -			makeTriStrip();  		}  		else  		{ @@ -4940,8 +6862,6 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)  					pt2--;  				}  			} - -			makeTriStrip();  		}  	}  	else @@ -4963,131 +6883,277 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)  			mIndices[3*i+v2] = i + 1;  		} -#if GEN_TRI_STRIP -		//make tri strip -		if (mTypeMask & OPEN_MASK) -		{ -			makeTriStrip(); -		} -		else -		{ -			S32 j = num_vertices-2; -			if (mTypeMask & TOP_MASK) -			{ -				mTriStrip.push_back(0); -				for (S32 i = 0; i <= j; ++i) -				{ -					mTriStrip.push_back(i); -					if (i != j) -					{ -						mTriStrip.push_back(j); -					} -					--j; -				} -			} -			else -			{ -				mTriStrip.push_back(j); -				for (S32 i = 0; i <= j; ++i) -				{ -					if (i != j) -					{ -						mTriStrip.push_back(j); -					} -					mTriStrip.push_back(i); -					--j; -				} -			} -			 -			mTriStrip.push_back(mTriStrip[mTriStrip.size()-1]); -			if (mTriStrip.size()%2 == 1) -			{ -				mTriStrip.push_back(mTriStrip[mTriStrip.size()-1]); -			} -		} -#endif  	}  	return TRUE;  } -void LLVolumeFace::makeTriStrip() +void LLVolumeFace::createBinormals()  { -#if GEN_TRI_STRIP -	for (U32 i = 0; i < mIndices.size(); i+=3) +	LLMemType m1(LLMemType::MTYPE_VOLUME); +	 +	if (!mBinormals)  	{ -		U16 i0 = mIndices[i]; -		U16 i1 = mIndices[i+1]; -		U16 i2 = mIndices[i+2]; +		allocateBinormals(mNumVertices); -		if ((i/3)%2 == 1) -		{ -			mTriStrip.push_back(i0); -			mTriStrip.push_back(i0); -			mTriStrip.push_back(i1); -			mTriStrip.push_back(i2); -			mTriStrip.push_back(i2); -		} -		else +		//generate binormals +		LLVector4a* pos = mPositions; +		LLVector2* tc = (LLVector2*) mTexCoords; +		LLVector4a* binorm = (LLVector4a*) mBinormals; + +		LLVector4a* end = mBinormals+mNumVertices; +		while (binorm < end)  		{ -			mTriStrip.push_back(i2); -			mTriStrip.push_back(i2); -			mTriStrip.push_back(i1); -			mTriStrip.push_back(i0); -			mTriStrip.push_back(i0); +			(*binorm++).clear();  		} -	} -	if (mTriStrip.size()%2 == 1) -	{ -		mTriStrip.push_back(mTriStrip[mTriStrip.size()-1]); -	} -#endif -} +		binorm = mBinormals; -void LLVolumeFace::createBinormals() -{ -	LLMemType m1(LLMemType::MTYPE_VOLUME); -	 -	if (!mHasBinormals) -	{ -		//generate binormals -		for (U32 i = 0; i < mIndices.size()/3; i++)  +		for (U32 i = 0; i < mNumIndices/3; i++)   		{	//for each triangle -			const VertexData& v0 = mVertices[mIndices[i*3+0]]; -			const VertexData& v1 = mVertices[mIndices[i*3+1]]; -			const VertexData& v2 = mVertices[mIndices[i*3+2]]; +			const U16& i0 = mIndices[i*3+0]; +			const U16& i1 = mIndices[i*3+1]; +			const U16& i2 = mIndices[i*3+2];  			//calculate binormal -			LLVector3 binorm = calc_binormal_from_triangle(v0.mPosition, v0.mTexCoord, -															v1.mPosition, v1.mTexCoord, -															v2.mPosition, v2.mTexCoord); +			LLVector4a binormal; +			calc_binormal_from_triangle(binormal, +										pos[i0], tc[i0], +										pos[i1], tc[i1], +										pos[i2], tc[i2]); -			for (U32 j = 0; j < 3; j++)  -			{ //add triangle normal to vertices -				mVertices[mIndices[i*3+j]].mBinormal += binorm; // * (weight_sum - d[j])/weight_sum; -			} + +			//add triangle normal to vertices +			binorm[i0].add(binormal); +			binorm[i1].add(binormal); +			binorm[i2].add(binormal);  			//even out quad contributions  			if (i % 2 == 0)   			{ -				mVertices[mIndices[i*3+2]].mBinormal += binorm; +				binorm[i2].add(binormal);  			}  			else   			{ -				mVertices[mIndices[i*3+1]].mBinormal += binorm; +				binorm[i1].add(binormal);  			}  		}  		//normalize binormals -		for (U32 i = 0; i < mVertices.size(); i++)  +		for (U32 i = 0; i < mNumVertices; i++)   		{ -			mVertices[i].mBinormal.normVec(); -			mVertices[i].mNormal.normVec(); +			binorm[i].normalize3fast(); +			//bump map/planar projection code requires normals to be normalized +			mNormals[i].normalize3fast();  		} +	} +} + +void LLVolumeFace::resizeVertices(S32 num_verts) +{ +	ll_aligned_free_16(mPositions); +	ll_aligned_free_16(mNormals); +	ll_aligned_free_16(mBinormals); +	ll_aligned_free_16(mTexCoords); + +	mBinormals = NULL; + +	if (num_verts) +	{ +		mPositions = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); +		assert_aligned(mPositions, 16); +		mNormals = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); +		assert_aligned(mNormals, 16); + +		//pad texture coordinate block end to allow for QWORD reads +		S32 size = ((num_verts*sizeof(LLVector2)) + 0xF) & ~0xF; +		mTexCoords = (LLVector2*) ll_aligned_malloc_16(size); +		assert_aligned(mTexCoords, 16); +	} +	else +	{ +		mPositions = NULL; +		mNormals = NULL; +		mTexCoords = NULL; +	} + +	mNumVertices = num_verts; +} + +void LLVolumeFace::pushVertex(const LLVolumeFace::VertexData& cv) +{ +	pushVertex(cv.getPosition(), cv.getNormal(), cv.mTexCoord); +} -		mHasBinormals = TRUE; +void LLVolumeFace::pushVertex(const LLVector4a& pos, const LLVector4a& norm, const LLVector2& tc) +{ +	S32 new_verts = mNumVertices+1; +	S32 new_size = new_verts*16; +//	S32 old_size = mNumVertices*16; + +	//positions +	mPositions = (LLVector4a*) realloc(mPositions, new_size); +	 +	//normals +	mNormals = (LLVector4a*) realloc(mNormals, new_size); +	 +	//tex coords +	new_size = ((new_verts*8)+0xF) & ~0xF; +	mTexCoords = (LLVector2*) realloc(mTexCoords, new_size); +	 + +	//just clear binormals +	ll_aligned_free_16(mBinormals); +	mBinormals = NULL; + +	mPositions[mNumVertices] = pos; +	mNormals[mNumVertices] = norm; +	mTexCoords[mNumVertices] = tc; + +	mNumVertices++;	 +} + +void LLVolumeFace::allocateBinormals(S32 num_verts) +{ +	ll_aligned_free_16(mBinormals); +	mBinormals = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); +} + +void LLVolumeFace::allocateWeights(S32 num_verts) +{ +	ll_aligned_free_16(mWeights); +	mWeights = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); +} + +void LLVolumeFace::resizeIndices(S32 num_indices) +{ +	ll_aligned_free_16(mIndices); +	 +	if (num_indices) +	{ +		//pad index block end to allow for QWORD reads +		S32 size = ((num_indices*sizeof(U16)) + 0xF) & ~0xF; +		 +		mIndices = (U16*) ll_aligned_malloc_16(size); +	} +	else +	{ +		mIndices = NULL; +	} + +	mNumIndices = num_indices; +} + +void LLVolumeFace::pushIndex(const U16& idx) +{ +	S32 new_count = mNumIndices + 1; +	S32 new_size = ((new_count*2)+0xF) & ~0xF; + +	S32 old_size = ((mNumIndices*2)+0xF) & ~0xF; +	if (new_size != old_size) +	{ +		mIndices = (U16*) realloc(mIndices, new_size); +	} +	 +	mIndices[mNumIndices++] = idx; +} + +void LLVolumeFace::fillFromLegacyData(std::vector<LLVolumeFace::VertexData>& v, std::vector<U16>& idx) +{ +	resizeVertices(v.size()); +	resizeIndices(idx.size()); + +	for (U32 i = 0; i < v.size(); ++i) +	{ +		mPositions[i] = v[i].getPosition(); +		mNormals[i] = v[i].getNormal(); +		mTexCoords[i] = v[i].mTexCoord; +	} + +	for (U32 i = 0; i < idx.size(); ++i) +	{ +		mIndices[i] = idx[i]; +	} +} + +void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMatrix4& norm_mat_in) +{ +	U16 offset = mNumVertices; + +	S32 new_count = face.mNumVertices + mNumVertices; + +	if (new_count > 65536) +	{ +		llerrs << "Cannot append face -- 16-bit overflow will occur." << llendl; +	} +	 +	if (face.mNumVertices == 0) +	{ +		llerrs << "Cannot append empty face." << llendl; +	} + +	//allocate new buffer space +	mPositions = (LLVector4a*) realloc(mPositions, new_count*sizeof(LLVector4a)); +	assert_aligned(mPositions, 16); +	mNormals = (LLVector4a*) realloc(mNormals, new_count*sizeof(LLVector4a)); +	assert_aligned(mNormals, 16); +	mTexCoords = (LLVector2*) realloc(mTexCoords, (new_count*sizeof(LLVector2)+0xF) & ~0xF); +	assert_aligned(mTexCoords, 16); +	 +	mNumVertices = new_count; + +	//get destination address of appended face +	LLVector4a* dst_pos = mPositions+offset; +	LLVector2* dst_tc = mTexCoords+offset; +	LLVector4a* dst_norm = mNormals+offset; + +	//get source addresses of appended face +	const LLVector4a* src_pos = face.mPositions; +	const LLVector2* src_tc = face.mTexCoords; +	const LLVector4a* src_norm = face.mNormals; + +	//load aligned matrices +	LLMatrix4a mat, norm_mat; +	mat.loadu(mat_in); +	norm_mat.loadu(norm_mat_in); + +	for (U32 i = 0; i < face.mNumVertices; ++i) +	{ +		//transform appended face position and store +		mat.affineTransform(src_pos[i], dst_pos[i]); + +		//transform appended face normal and store +		norm_mat.rotate(src_norm[i], dst_norm[i]); +		dst_norm[i].normalize3fast(); + +		//copy appended face texture coordinate +		dst_tc[i] = src_tc[i]; + +		if (offset == 0 && i == 0) +		{ //initialize bounding box +			mExtents[0] = mExtents[1] = dst_pos[i]; +		} +		else +		{ +			//stretch bounding box +			update_min_max(mExtents[0], mExtents[1], dst_pos[i]); +		} +	} + + +	new_count = mNumIndices + face.mNumIndices; + +	//allocate new index buffer +	mIndices = (U16*) realloc(mIndices, (new_count*sizeof(U16)+0xF) & ~0xF); +	 +	//get destination address into new index buffer +	U16* dst_idx = mIndices+mNumIndices; +	mNumIndices = new_count; + +	for (U32 i = 0; i < face.mNumIndices; ++i) +	{ //copy indices, offsetting by old vertex count +		dst_idx[i] = face.mIndices[i]+offset;  	}  } @@ -5117,18 +7183,20 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)  	num_vertices = mNumS*mNumT;  	num_indices = (mNumS-1)*(mNumT-1)*6; -	mVertices.resize(num_vertices); -  	if (!partial_build)  	{ -		mIndices.resize(num_indices); -		mEdge.resize(num_indices); -	} -	else -	{ -		mHasBinormals = FALSE; +		resizeVertices(num_vertices); +		resizeIndices(num_indices); + +		if (!volume->isMeshAssetLoaded()) +		{ +			mEdge.resize(num_indices); +		}  	} +	LLVector4a* pos = (LLVector4a*) mPositions; +	LLVector4a* norm = (LLVector4a*) mNormals; +	LLVector2* tc = (LLVector2*) mTexCoords;  	S32 begin_stex = llfloor( profile[mBeginS].mV[2] );  	S32 num_s = ((mTypeMask & INNER_MASK) && (mTypeMask & FLAT_MASK) && mNumS > 2) ? mNumS/2 : mNumS; @@ -5179,21 +7247,20 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)  				i = mBeginS + s + max_s*t;  			} -			mVertices[cur_vertex].mPosition = mesh[i].mPos; -			mVertices[cur_vertex].mTexCoord = LLVector2(ss,tt); +			pos[cur_vertex].load3(mesh[i].mPos.mV); +			tc[cur_vertex] = LLVector2(ss,tt); -			mVertices[cur_vertex].mNormal = LLVector3(0,0,0); -			mVertices[cur_vertex].mBinormal = LLVector3(0,0,0); - +			norm[cur_vertex].clear();  			cur_vertex++;  			if ((mTypeMask & INNER_MASK) && (mTypeMask & FLAT_MASK) && mNumS > 2 && s > 0)  			{ -				mVertices[cur_vertex].mPosition = mesh[i].mPos; -				mVertices[cur_vertex].mTexCoord = LLVector2(ss,tt); + +				pos[cur_vertex].load3(mesh[i].mPos.mV); +				tc[cur_vertex] = LLVector2(ss,tt); -				mVertices[cur_vertex].mNormal = LLVector3(0,0,0); -				mVertices[cur_vertex].mBinormal = LLVector3(0,0,0); +				norm[cur_vertex].clear(); +				  				cur_vertex++;  			}  		} @@ -5211,29 +7278,29 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)  			i = mBeginS + s + max_s*t;  			ss = profile[mBeginS + s].mV[2] - begin_stex; -			mVertices[cur_vertex].mPosition = mesh[i].mPos; -			mVertices[cur_vertex].mTexCoord = LLVector2(ss,tt); -		 -			mVertices[cur_vertex].mNormal = LLVector3(0,0,0); -			mVertices[cur_vertex].mBinormal = LLVector3(0,0,0); - +			pos[cur_vertex].load3(mesh[i].mPos.mV); +			tc[cur_vertex] = LLVector2(ss,tt); +			norm[cur_vertex].clear();  +			  			cur_vertex++;  		}  	}  	//get bounding box for this side -	LLVector3& face_min = mExtents[0]; -	LLVector3& face_max = mExtents[1]; -	mCenter.clearVec(); +	LLVector4a& face_min = mExtents[0]; +	LLVector4a& face_max = mExtents[1]; +	mCenter->clear(); + +	face_min = face_max = pos[0]; -	face_min = face_max = mVertices[0].mPosition; -	for (U32 i = 1; i < mVertices.size(); ++i) +	for (U32 i = 1; i < mNumVertices; ++i)  	{ -		update_min_max(face_min, face_max, mVertices[i].mPosition); +		update_min_max(face_min, face_max, pos[i]);  	} -	mCenter = (face_min + face_max) * 0.5f; +	mCenter->setAdd(face_min, face_max); +	mCenter->mul(0.5f);  	S32 cur_index = 0;  	S32 cur_edge = 0; @@ -5241,18 +7308,9 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)  	if (!partial_build)  	{ -#if GEN_TRI_STRIP -		mTriStrip.clear(); -#endif -  		// Now we generate the indices.  		for (t = 0; t < (mNumT-1); t++)  		{ -#if GEN_TRI_STRIP -			//prepend terminating index to strip -			mTriStrip.push_back(mNumS*t); -#endif -  			for (s = 0; s < (mNumS-1); s++)  			{	  				mIndices[cur_index++] = s   + mNumS*t;			//bottom left @@ -5262,16 +7320,6 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)  				mIndices[cur_index++] = s+1 + mNumS*t;			//bottom right  				mIndices[cur_index++] = s+1 + mNumS*(t+1);		//top right -#if GEN_TRI_STRIP -				if (s == 0) -				{ -					mTriStrip.push_back(s+mNumS*t); -					mTriStrip.push_back(s+mNumS*(t+1)); -				} -				mTriStrip.push_back(s+1+mNumS*t); -				mTriStrip.push_back(s+1+mNumS*(t+1)); -#endif -				  				mEdge[cur_edge++] = (mNumS-1)*2*t+s*2+1;						//bottom left/top right neighbor face   				if (t < mNumT-2) {												//top right/top left neighbor face   					mEdge[cur_edge++] = (mNumS-1)*2*(t+1)+s*2+1; @@ -5312,52 +7360,61 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)  				}  				mEdge[cur_edge++] = (mNumS-1)*2*t+s*2;							//top right/bottom left neighbor face	  			} -#if GEN_TRI_STRIP -			//append terminating vertex to strip -			mTriStrip.push_back(mNumS-1+mNumS*(t+1)); -#endif  		} +	} -#if GEN_TRI_STRIP -		if (mTriStrip.size()%2 == 1) -		{ -			mTriStrip.push_back(mTriStrip[mTriStrip.size()-1]); -		} -#endif +	//clear normals +	for (U32 i = 0; i < mNumVertices; i++) +	{ +		mNormals[i].clear();  	}  	//generate normals  -	for (U32 i = 0; i < mIndices.size()/3; i++) //for each triangle +	for (U32 i = 0; i < mNumIndices/3; i++) //for each triangle  	{  		const U16* idx = &(mIndices[i*3]); -			 -		VertexData* v[] =  -		{	&mVertices[idx[0]], &mVertices[idx[1]], &mVertices[idx[2]] }; -					 -		//calculate triangle normal -		LLVector3 norm = (v[0]->mPosition-v[1]->mPosition) % (v[0]->mPosition-v[2]->mPosition); +		 -		v[0]->mNormal += norm; -		v[1]->mNormal += norm; -		v[2]->mNormal += norm; +		LLVector4a* v[] =  +		{	pos+idx[0], pos+idx[1], pos+idx[2] }; +		 +		LLVector4a* n[] =  +		{	norm+idx[0], norm+idx[1], norm+idx[2] }; +		 +		//calculate triangle normal +		LLVector4a a, b, c; +		 +		a.setSub(*v[0], *v[1]); +		b.setSub(*v[0], *v[2]); +		c.setCross3(a,b); +		n[0]->add(c); +		n[1]->add(c); +		n[2]->add(c); +		  		//even out quad contributions -		v[i%2+1]->mNormal += norm; +		n[i%2+1]->add(c);  	}  	// adjust normals based on wrapping and stitching -	BOOL s_bottom_converges = ((mVertices[0].mPosition - mVertices[mNumS*(mNumT-2)].mPosition).magVecSquared() < 0.000001f); -	BOOL s_top_converges = ((mVertices[mNumS-1].mPosition - mVertices[mNumS*(mNumT-2)+mNumS-1].mPosition).magVecSquared() < 0.000001f); +	LLVector4a top; +	top.setSub(pos[0], pos[mNumS*(mNumT-2)]); +	BOOL s_bottom_converges = (top.dot3(top) < 0.000001f); + +	top.setSub(pos[mNumS-1], pos[mNumS*(mNumT-2)+mNumS-1]); +	BOOL s_top_converges = (top.dot3(top) < 0.000001f); +  	if (sculpt_stitching == LL_SCULPT_TYPE_NONE)  // logic for non-sculpt volumes  	{  		if (volume->getPath().isOpen() == FALSE)  		{ //wrap normals on T  			for (S32 i = 0; i < mNumS; i++)  			{ -				LLVector3 norm = mVertices[i].mNormal + mVertices[mNumS*(mNumT-1)+i].mNormal; -				mVertices[i].mNormal = norm; -				mVertices[mNumS*(mNumT-1)+i].mNormal = norm; +				LLVector4a n; +				n.setAdd(norm[i], norm[mNumS*(mNumT-1)+i]); +				norm[i] = n; +				norm[mNumS*(mNumT-1)+i] = n;  			}  		} @@ -5365,9 +7422,10 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)  		{ //wrap normals on S  			for (S32 i = 0; i < mNumT; i++)  			{ -				LLVector3 norm = mVertices[mNumS*i].mNormal + mVertices[mNumS*i+mNumS-1].mNormal; -				mVertices[mNumS * i].mNormal = norm; -				mVertices[mNumS * i+mNumS-1].mNormal = norm; +				LLVector4a n; +				n.setAdd(norm[mNumS*i], norm[mNumS*i+mNumS-1]); +				norm[mNumS * i] = n; +				norm[mNumS * i+mNumS-1] = n;  			}  		} @@ -5378,7 +7436,7 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)  			{ //all lower S have same normal  				for (S32 i = 0; i < mNumT; i++)  				{ -					mVertices[mNumS*i].mNormal = LLVector3(1,0,0); +					norm[mNumS*i].set(1,0,0);  				}  			} @@ -5386,12 +7444,11 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)  			{ //all upper S have same normal  				for (S32 i = 0; i < mNumT; i++)  				{ -					mVertices[mNumS*i+mNumS-1].mNormal = LLVector3(-1,0,0); +					norm[mNumS*i+mNumS-1].set(-1,0,0);  				}  			}  		}  	} -	  	else  // logic for sculpt volumes  	{  		BOOL average_poles = FALSE; @@ -5414,30 +7471,33 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)  		{  			// average normals for north pole -			LLVector3 average(0.0, 0.0, 0.0); +			LLVector4a average; +			average.clear(); +  			for (S32 i = 0; i < mNumS; i++)  			{ -				average += mVertices[i].mNormal; +				average.add(norm[i]);  			}  			// set average  			for (S32 i = 0; i < mNumS; i++)  			{ -				mVertices[i].mNormal = average; +				norm[i] = average;  			}  			// average normals for south pole -			average = LLVector3(0.0, 0.0, 0.0); +			average.clear(); +  			for (S32 i = 0; i < mNumS; i++)  			{ -				average += mVertices[i + mNumS * (mNumT - 1)].mNormal; +				average.add(norm[i + mNumS * (mNumT - 1)]);  			}  			// set average  			for (S32 i = 0; i < mNumS; i++)  			{ -				mVertices[i + mNumS * (mNumT - 1)].mNormal = average; +				norm[i + mNumS * (mNumT - 1)] = average;  			}  		} @@ -5447,23 +7507,22 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)  		{  			for (S32 i = 0; i < mNumT; i++)  			{ -				LLVector3 norm = mVertices[mNumS*i].mNormal + mVertices[mNumS*i+mNumS-1].mNormal; -				mVertices[mNumS * i].mNormal = norm; -				mVertices[mNumS * i+mNumS-1].mNormal = norm; +				LLVector4a n; +				n.setAdd(norm[mNumS*i], norm[mNumS*i+mNumS-1]); +				norm[mNumS * i] = n; +				norm[mNumS * i+mNumS-1] = n;  			}  		} - -		  		if (wrap_t)  		{  			for (S32 i = 0; i < mNumS; i++)  			{ -				LLVector3 norm = mVertices[i].mNormal + mVertices[mNumS*(mNumT-1)+i].mNormal; -				mVertices[i].mNormal = norm; -				mVertices[mNumS*(mNumT-1)+i].mNormal = norm; +				LLVector4a n; +				n.setAdd(norm[i], norm[mNumS*(mNumT-1)+i]); +				norm[i] = n; +				norm[mNumS*(mNumT-1)+i] = n;  			} -			  		}  	} @@ -5473,41 +7532,51 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)  // Finds binormal based on three vertices with texture coordinates.  // Fills in dummy values if the triangle has degenerate texture coordinates. -LLVector3 calc_binormal_from_triangle(  -	const LLVector3& pos0, +void calc_binormal_from_triangle(LLVector4a& binormal, + +	const LLVector4a& pos0,  	const LLVector2& tex0, -	const LLVector3& pos1, +	const LLVector4a& pos1,  	const LLVector2& tex1, -	const LLVector3& pos2, +	const LLVector4a& pos2,  	const LLVector2& tex2)  { -	LLVector3 rx0( pos0.mV[VX], tex0.mV[VX], tex0.mV[VY] ); -	LLVector3 rx1( pos1.mV[VX], tex1.mV[VX], tex1.mV[VY] ); -	LLVector3 rx2( pos2.mV[VX], tex2.mV[VX], tex2.mV[VY] ); +	LLVector4a rx0( pos0[VX], tex0.mV[VX], tex0.mV[VY] ); +	LLVector4a rx1( pos1[VX], tex1.mV[VX], tex1.mV[VY] ); +	LLVector4a rx2( pos2[VX], tex2.mV[VX], tex2.mV[VY] ); -	LLVector3 ry0( pos0.mV[VY], tex0.mV[VX], tex0.mV[VY] ); -	LLVector3 ry1( pos1.mV[VY], tex1.mV[VX], tex1.mV[VY] ); -	LLVector3 ry2( pos2.mV[VY], tex2.mV[VX], tex2.mV[VY] ); +	LLVector4a ry0( pos0[VY], tex0.mV[VX], tex0.mV[VY] ); +	LLVector4a ry1( pos1[VY], tex1.mV[VX], tex1.mV[VY] ); +	LLVector4a ry2( pos2[VY], tex2.mV[VX], tex2.mV[VY] ); -	LLVector3 rz0( pos0.mV[VZ], tex0.mV[VX], tex0.mV[VY] ); -	LLVector3 rz1( pos1.mV[VZ], tex1.mV[VX], tex1.mV[VY] ); -	LLVector3 rz2( pos2.mV[VZ], tex2.mV[VX], tex2.mV[VY] ); +	LLVector4a rz0( pos0[VZ], tex0.mV[VX], tex0.mV[VY] ); +	LLVector4a rz1( pos1[VZ], tex1.mV[VX], tex1.mV[VY] ); +	LLVector4a rz2( pos2[VZ], tex2.mV[VX], tex2.mV[VY] ); -	LLVector3 r0 = (rx0 - rx1) % (rx0 - rx2); -	LLVector3 r1 = (ry0 - ry1) % (ry0 - ry2); -	LLVector3 r2 = (rz0 - rz1) % (rz0 - rz2); +	LLVector4a lhs, rhs; + +	LLVector4a r0;  +	lhs.setSub(rx0, rx1); rhs.setSub(rx0, rx2); +	r0.setCross3(lhs, rhs); +		 +	LLVector4a r1; +	lhs.setSub(ry0, ry1); rhs.setSub(ry0, ry2); +	r1.setCross3(lhs, rhs); + +	LLVector4a r2; +	lhs.setSub(rz0, rz1); rhs.setSub(rz0, rz2); +	r2.setCross3(lhs, rhs); -	if( r0.mV[VX] && r1.mV[VX] && r2.mV[VX] ) +	if( r0[VX] && r1[VX] && r2[VX] )  	{ -		LLVector3 binormal( -				-r0.mV[VZ] / r0.mV[VX], -				-r1.mV[VZ] / r1.mV[VX], -				-r2.mV[VZ] / r2.mV[VX]); +		binormal.set( +				-r0[VZ] / r0[VX], +				-r1[VZ] / r1[VX], +				-r2[VZ] / r2[VX]);  		// binormal.normVec(); -		return binormal;  	}  	else  	{ -		return LLVector3( 0, 1 , 0 ); +		binormal.set( 0, 1 , 0 );  	}  } diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h index d48a79ee46..76cf9de613 100644 --- a/indra/llmath/llvolume.h +++ b/indra/llmath/llvolume.h @@ -34,8 +34,13 @@ class LLPathParams;  class LLVolumeParams;  class LLProfile;  class LLPath; + +template <class T> class LLOctreeNode; + +class LLVector4a;  class LLVolumeFace;  class LLVolume; +class LLVolumeTriangle;  #include "lldarray.h"  #include "lluuid.h" @@ -43,6 +48,8 @@ class LLVolume;  //#include "vmath.h"  #include "v2math.h"  #include "v3math.h" +#include "v3dmath.h" +#include "v4math.h"  #include "llquaternion.h"  #include "llstrider.h"  #include "v4coloru.h" @@ -177,12 +184,14 @@ const U8 LL_SCULPT_TYPE_SPHERE    = 1;  const U8 LL_SCULPT_TYPE_TORUS     = 2;  const U8 LL_SCULPT_TYPE_PLANE     = 3;  const U8 LL_SCULPT_TYPE_CYLINDER  = 4; - -const U8 LL_SCULPT_TYPE_MASK      = LL_SCULPT_TYPE_SPHERE | LL_SCULPT_TYPE_TORUS | LL_SCULPT_TYPE_PLANE | LL_SCULPT_TYPE_CYLINDER; +const U8 LL_SCULPT_TYPE_MESH      = 5; +const U8 LL_SCULPT_TYPE_MASK      = LL_SCULPT_TYPE_SPHERE | LL_SCULPT_TYPE_TORUS | LL_SCULPT_TYPE_PLANE | +	LL_SCULPT_TYPE_CYLINDER | LL_SCULPT_TYPE_MESH;  const U8 LL_SCULPT_FLAG_INVERT    = 64;  const U8 LL_SCULPT_FLAG_MIRROR    = 128; +const S32 LL_SCULPT_MESH_MAX_FACES = 8;  class LLProfileParams  { @@ -569,6 +578,9 @@ public:  	BOOL importLegacyStream(std::istream& input_stream);  	BOOL exportLegacyStream(std::ostream& output_stream) const; +	LLSD sculptAsLLSD() const; +	bool sculptFromLLSD(LLSD& sd); +	  	LLSD asLLSD() const;  	operator LLSD() const { return asLLSD(); }  	bool fromLLSD(LLSD& sd); @@ -628,7 +640,8 @@ public:  	const F32&  getSkew() const			{ return mPathParams.getSkew();			}  	const LLUUID& getSculptID() const	{ return mSculptID;						}  	const U8& getSculptType() const     { return mSculptType;                   } - +	bool isSculpt() const; +	bool isMeshSculpt() const;  	BOOL isConvex() const;  	// 'begin' and 'end' should be in range [0, 1] (they will be clamped) @@ -677,6 +690,9 @@ public:  	BOOL isFlat(S32 face) const							{ return (mFaces[face].mCount == 2); }  	BOOL isOpen() const									{ return mOpen; }  	void setDirty()										{ mDirty     = TRUE; } + +	static S32 getNumPoints(const LLProfileParams& params, BOOL path_open, F32 detail = 1.0f, S32 split = 0, +				  BOOL is_sculpted = FALSE, S32 sculpt_size = 0);  	BOOL generate(const LLProfileParams& params, BOOL path_open, F32 detail = 1.0f, S32 split = 0,  				  BOOL is_sculpted = FALSE, S32 sculpt_size = 0);  	BOOL isConcave() const								{ return mConcave; } @@ -701,6 +717,7 @@ public:  protected:  	void genNormals(const LLProfileParams& params); +	static S32 getNumNGonPoints(const LLProfileParams& params, S32 sides, F32 offset=0.0f, F32 bevel = 0.0f, F32 ang_scale = 1.f, S32 split = 0);  	void genNGon(const LLProfileParams& params, S32 sides, F32 offset=0.0f, F32 bevel = 0.0f, F32 ang_scale = 1.f, S32 split = 0);  	Face* addHole(const LLProfileParams& params, BOOL flat, F32 sides, F32 offset, F32 box_hollow, F32 ang_scale, S32 split = 0); @@ -743,6 +760,9 @@ public:  	virtual ~LLPath(); +	static S32 getNumPoints(const LLPathParams& params, F32 detail); +	static S32 getNumNGonPoints(const LLPathParams& params, S32 sides, F32 offset=0.0f, F32 end_scale = 1.f, F32 twist_scale = 1.f); +  	void genNGon(const LLPathParams& params, S32 sides, F32 offset=0.0f, F32 end_scale = 1.f, F32 twist_scale = 1.f);  	virtual BOOL generate(const LLPathParams& params, F32 detail=1.0f, S32 split = 0,  						  BOOL is_sculpted = FALSE, S32 sculpt_size = 0); @@ -779,30 +799,88 @@ public:  class LLVolumeFace  {  public: -	LLVolumeFace() :  -		mID(0), -		mTypeMask(0), -		mHasBinormals(FALSE), -		mBeginS(0), -		mBeginT(0), -		mNumS(0), -		mNumT(0) +	class VertexData  	{ -	} +		enum  +		{ +			POSITION = 0, +			NORMAL = 1 +		}; + +	private: +		void init(); +	public: +		VertexData(); +		VertexData(const VertexData& rhs); +		const VertexData& operator=(const VertexData& rhs); + +		~VertexData(); +		LLVector4a& getPosition(); +		LLVector4a& getNormal(); +		const LLVector4a& getPosition() const; +		const LLVector4a& getNormal() const; +		void setPosition(const LLVector4a& pos); +		void setNormal(const LLVector4a& norm); +		 + +		LLVector2 mTexCoord; + +		bool operator<(const VertexData& rhs) const; +		bool operator==(const VertexData& rhs) const; +		bool compareNormal(const VertexData& rhs, F32 angle_cutoff) const; + +	private: +		LLVector4a* mData; +	}; + +	LLVolumeFace(); +	LLVolumeFace(const LLVolumeFace& src); +	LLVolumeFace& operator=(const LLVolumeFace& rhs); + +	~LLVolumeFace(); +private: +	void freeData(); +public:  	BOOL create(LLVolume* volume, BOOL partial_build = FALSE);  	void createBinormals(); -	void makeTriStrip(); -	class VertexData +	void appendFace(const LLVolumeFace& face, LLMatrix4& transform, LLMatrix4& normal_tranform); + +	void resizeVertices(S32 num_verts); +	void allocateBinormals(S32 num_verts); +	void allocateWeights(S32 num_verts); +	void resizeIndices(S32 num_indices); +	void fillFromLegacyData(std::vector<LLVolumeFace::VertexData>& v, std::vector<U16>& idx); + +	void pushVertex(const VertexData& cv); +	void pushVertex(const LLVector4a& pos, const LLVector4a& norm, const LLVector2& tc); +	void pushIndex(const U16& idx); + +	void swapData(LLVolumeFace& rhs); + +	void getVertexData(U16 indx, LLVolumeFace::VertexData& cv); + +	class VertexMapData : public LLVolumeFace::VertexData  	{  	public: -		LLVector3 mPosition; -		LLVector3 mNormal; -		LLVector3 mBinormal; -		LLVector2 mTexCoord; +		U16 mIndex; + +		bool operator==(const LLVolumeFace::VertexData& rhs) const; + +		struct ComparePosition +		{ +			bool operator()(const LLVector3& a, const LLVector3& b) const; +		}; + +		typedef std::map<LLVector3, std::vector<VertexMapData>, VertexMapData::ComparePosition > PointMap;  	}; +	void optimize(F32 angle_cutoff = 2.f); +	void cacheOptimize(); + +	void createOctree(F32 scaler = 0.25f, const LLVector4a& center = LLVector4a(0,0,0), const LLVector4a& size = LLVector4a(0.5f,0.5f,0.5f)); +  	enum  	{  		SINGLE_MASK =	0x0001, @@ -821,22 +899,35 @@ public:  public:  	S32 mID;  	U32 mTypeMask; -	LLVector3 mCenter; -	BOOL mHasBinormals; - +	  	// Only used for INNER/OUTER faces  	S32 mBeginS;  	S32 mBeginT;  	S32 mNumS;  	S32 mNumT; -	LLVector3 mExtents[2]; //minimum and maximum point of face +	LLVector4a* mExtents; //minimum and maximum point of face +	LLVector4a* mCenter; +	LLVector2   mTexCoordExtents[2]; //minimum and maximum of texture coordinates of the face. + +	S32 mNumVertices; +	S32 mNumIndices; + +	LLVector4a* mPositions; +	LLVector4a* mNormals; +	LLVector4a* mBinormals; +	LLVector2*  mTexCoords; +	U16* mIndices; -	std::vector<VertexData> mVertices; -	std::vector<U16>	mIndices; -	std::vector<U16>	mTriStrip;  	std::vector<S32>	mEdge; +	//list of skin weights for rigged volumes +	// format is mWeights[vertex_index].mV[influence] = <joint_index>.<weight> +	// mWeights.size() should be empty or match mVertices.size()   +	LLVector4a* mWeights; + +	LLOctreeNode<LLVolumeTriangle>* mOctree; +  private:  	BOOL createUnCutCubeCap(LLVolume* volume, BOOL partial_build = FALSE);  	BOOL createCap(LLVolume* volume, BOOL partial_build = FALSE); @@ -847,8 +938,7 @@ class LLVolume : public LLRefCount  {  	friend class LLVolumeLODGroup; -private: -	LLVolume(const LLVolume&);  // Don't implement +protected:  	~LLVolume(); // use unref  public: @@ -870,9 +960,10 @@ public:  	U8 getProfileType()	const								{ return mParams.getProfileParams().getCurveType(); }  	U8 getPathType() const									{ return mParams.getPathParams().getCurveType(); } -	S32	getNumFaces() const									{ return (S32)mProfilep->mFaces.size(); } +	S32	getNumFaces() const;  	S32 getNumVolumeFaces() const							{ return mVolumeFaces.size(); }  	F32 getDetail() const									{ return mDetail; } +	F32 getSurfaceArea() const								{ return mSurfaceArea; }  	const LLVolumeParams& getParams() const					{ return mParams; }  	LLVolumeParams getCopyOfParams() const					{ return mParams; }  	const LLProfile& getProfile() const						{ return *mProfilep; } @@ -892,15 +983,18 @@ public:  	BOOL isUnique() const									{ return mUnique; }  	S32 getSculptLevel() const                              { return mSculptLevel; } -	 +	void setSculptLevel(S32 level)							{ mSculptLevel = level; } +  	S32 *getTriangleIndices(U32 &num_indices) const;  	// returns number of triangle indeces required for path/profile mesh  	S32 getNumTriangleIndices() const; +	static void getLoDTriangleCounts(const LLVolumeParams& params, S32* counts); + +	S32 getNumTriangles(S32* vcount = NULL) const;  	void generateSilhouetteVertices(std::vector<LLVector3> &vertices,   									std::vector<LLVector3> &normals,  -									std::vector<S32> &segments,   									const LLVector3& view_vec,  									const LLMatrix4& mat,  									const LLMatrix3& norm_mat, @@ -916,6 +1010,13 @@ public:  							 LLVector3* normal = NULL,               // return the surface normal at the intersection point  							 LLVector3* bi_normal = NULL             // return the surface bi-normal at the intersection point  		); + +	S32 lineSegmentIntersect(const LLVector4a& start, const LLVector4a& end,  +								   S32 face = 1, +								   LLVector3* intersection = NULL, +								   LLVector2* tex_coord = NULL, +								   LLVector3* normal = NULL, +								   LLVector3* bi_normal = NULL);  	// The following cleans up vertices and triangles,  	// getting rid of degenerate triangles and duplicate vertices, @@ -937,11 +1038,14 @@ public:  	friend std::ostream& operator<<(std::ostream &s, const LLVolume *volumep);		// HACK to bypass Windoze confusion over   																				// conversion if *(LLVolume*) to LLVolume&  	const LLVolumeFace &getVolumeFace(const S32 f) const {return mVolumeFaces[f];} // DO NOT DELETE VOLUME WHILE USING THIS REFERENCE, OR HOLD A POINTER TO THIS VOLUMEFACE - +	  	U32					mFaceMask;			// bit array of which faces exist in this volume  	LLVector3			mLODScaleBias;		// vector for biasing LOD based on scale  	void sculpt(U16 sculpt_width, U16 sculpt_height, S8 sculpt_components, const U8* sculpt_data, S32 sculpt_level); +	void copyVolumeFaces(const LLVolume* volume); +	void cacheOptimize(); +  private:  	void sculptGenerateMapVertices(U16 sculpt_width, U16 sculpt_height, S8 sculpt_components, const U8* sculpt_data, U8 sculpt_type);  	F32 sculptGetSurfaceArea(); @@ -952,35 +1056,57 @@ private:  protected:  	BOOL generate();  	void createVolumeFaces(); +public: +	virtual bool unpackVolumeFaces(std::istream& is, S32 size); + +	virtual void setMeshAssetLoaded(BOOL loaded); +	virtual BOOL isMeshAssetLoaded();   protected:  	BOOL mUnique;  	F32 mDetail;  	S32 mSculptLevel; +	F32 mSurfaceArea; //unscaled surface area +	BOOL mIsMeshAssetLoaded;  	LLVolumeParams mParams;  	LLPath *mPathp;  	LLProfile *mProfilep;  	std::vector<Point> mMesh; - +	  	BOOL mGenerateSingleFace;  	typedef std::vector<LLVolumeFace> face_list_t;  	face_list_t mVolumeFaces; + +public: +	LLVector4a* mHullPoints; +	U16* mHullIndices; +	S32 mNumHullPoints; +	S32 mNumHullIndices;  };  std::ostream& operator<<(std::ostream &s, const LLVolumeParams &volume_params); -LLVector3 calc_binormal_from_triangle( -		const LLVector3& pos0, +void calc_binormal_from_triangle( +		LLVector4a& binormal, +		const LLVector4a& pos0,  		const LLVector2& tex0, -		const LLVector3& pos1, +		const LLVector4a& pos1,  		const LLVector2& tex1, -		const LLVector3& pos2, +		const LLVector4a& pos2,  		const LLVector2& tex2); +BOOL LLLineSegmentBoxIntersect(const F32* start, const F32* end, const F32* center, const F32* size);  BOOL LLLineSegmentBoxIntersect(const LLVector3& start, const LLVector3& end, const LLVector3& center, const LLVector3& size); +BOOL LLLineSegmentBoxIntersect(const LLVector4a& start, const LLVector4a& end, const LLVector4a& center, const LLVector4a& size); +  BOOL LLTriangleRayIntersect(const LLVector3& vert0, const LLVector3& vert1, const LLVector3& vert2, const LLVector3& orig, const LLVector3& dir, -							F32* intersection_a, F32* intersection_b, F32* intersection_t, BOOL two_sided); +							F32& intersection_a, F32& intersection_b, F32& intersection_t, BOOL two_sided); + +BOOL LLTriangleRayIntersect(const LLVector4a& vert0, const LLVector4a& vert1, const LLVector4a& vert2, const LLVector4a& orig, const LLVector4a& dir, +							F32& intersection_a, F32& intersection_b, F32& intersection_t); +BOOL LLTriangleRayIntersectTwoSided(const LLVector4a& vert0, const LLVector4a& vert1, const LLVector4a& vert2, const LLVector4a& orig, const LLVector4a& dir, +							F32& intersection_a, F32& intersection_b, F32& intersection_t); diff --git a/indra/llmath/llvolumemgr.cpp b/indra/llmath/llvolumemgr.cpp index 88c195936c..c60b750088 100644 --- a/indra/llmath/llvolumemgr.cpp +++ b/indra/llmath/llvolumemgr.cpp @@ -314,7 +314,7 @@ BOOL LLVolumeLODGroup::derefLOD(LLVolume *volumep)  		{  			llassert_always(mLODRefs[i] > 0);  			mLODRefs[i]--; -#if 1 // SJB: Possible opt: keep other lods around +#if 0 // SJB: Possible opt: keep other lods around  			if (!mLODRefs[i])  			{  				mVolumeLODs[i] = NULL; @@ -369,6 +369,19 @@ F32 LLVolumeLODGroup::getVolumeScaleFromDetail(const S32 detail)  	return mDetailScales[detail];  } +S32 LLVolumeLODGroup::getVolumeDetailFromScale(const F32 detail) +{ +	for (S32 i = 1; i < 4; i++) +	{ +		if (mDetailScales[i] > detail) +		{ +			return i-1; +		} +	} + +	return 3; +} +  F32 LLVolumeLODGroup::dump()  {  	F32 usage = 0.f; diff --git a/indra/llmath/llvolumemgr.h b/indra/llmath/llvolumemgr.h index 5257da2693..c75906f675 100644 --- a/indra/llmath/llvolumemgr.h +++ b/indra/llmath/llvolumemgr.h @@ -53,6 +53,7 @@ public:  	static S32 getDetailFromTan(const F32 tan_angle);  	static void getDetailProximity(const F32 tan_angle, F32 &to_lower, F32& to_higher);  	static F32 getVolumeScaleFromDetail(const S32 detail); +	static S32 getVolumeDetailFromScale(F32 scale);  	LLVolume* refLOD(const S32 detail);  	BOOL derefLOD(LLVolume *volumep); diff --git a/indra/llmath/llvolumeoctree.cpp b/indra/llmath/llvolumeoctree.cpp new file mode 100644 index 0000000000..b5a935c2b5 --- /dev/null +++ b/indra/llmath/llvolumeoctree.cpp @@ -0,0 +1,256 @@ +/**  + + * @file llvolumeoctree.cpp + * + * $LicenseInfo:firstyear=2002&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2010, Linden Research, Inc. + *  + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + *  + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * Lesser General Public License for more details. + *  + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA + *  + * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA + * $/LicenseInfo$ + */ + +#include "llvolumeoctree.h" +#include "llvector4a.h" + +BOOL LLLineSegmentBoxIntersect(const LLVector4a& start, const LLVector4a& end, const LLVector4a& center, const LLVector4a& size) +{ +	LLVector4a fAWdU; +	LLVector4a dir; +	LLVector4a diff; + +	dir.setSub(end, start); +	dir.mul(0.5f); + +	diff.setAdd(end,start); +	diff.mul(0.5f); +	diff.sub(center); +	fAWdU.setAbs(dir);  + +	LLVector4a rhs; +	rhs.setAdd(size, fAWdU); + +	LLVector4a lhs; +	lhs.setAbs(diff); + +	U32 grt = lhs.greaterThan(rhs).getGatheredBits(); + +	if (grt & 0x7) +	{ +		return false; +	} +	 +	LLVector4a f; +	f.setCross3(dir, diff); +	f.setAbs(f); + +	LLVector4a v0, v1; + +	v0 = _mm_shuffle_ps(size, size,_MM_SHUFFLE(3,0,0,1)); +	v1 = _mm_shuffle_ps(fAWdU, fAWdU, _MM_SHUFFLE(3,1,2,2)); +	lhs.setMul(v0, v1); + +	v0 = _mm_shuffle_ps(size, size, _MM_SHUFFLE(3,1,2,2)); +	v1 = _mm_shuffle_ps(fAWdU, fAWdU, _MM_SHUFFLE(3,0,0,1)); +	rhs.setMul(v0, v1); +	rhs.add(lhs); +	 +	grt = f.greaterThan(rhs).getGatheredBits(); + +	return (grt & 0x7) ? false : true; +} + + +LLVolumeOctreeListener::LLVolumeOctreeListener(LLOctreeNode<LLVolumeTriangle>* node) +{ +	node->addListener(this); +} + +LLVolumeOctreeListener::~LLVolumeOctreeListener() +{ + +} +	 +void LLVolumeOctreeListener::handleChildAddition(const LLOctreeNode<LLVolumeTriangle>* parent,  +	LLOctreeNode<LLVolumeTriangle>* child) +{ +	new LLVolumeOctreeListener(child); +} + + +LLOctreeTriangleRayIntersect::LLOctreeTriangleRayIntersect(const LLVector4a& start, const LLVector4a& dir,  +							   const LLVolumeFace* face, F32* closest_t, +							   LLVector3* intersection,LLVector2* tex_coord, LLVector3* normal, LLVector3* bi_normal) +   : mFace(face), +     mStart(start), +	 mDir(dir), +	 mIntersection(intersection), +	 mTexCoord(tex_coord), +	 mNormal(normal), +	 mBinormal(bi_normal), +	 mClosestT(closest_t), +	 mHitFace(false) +{ +	mEnd.setAdd(mStart, mDir); +} + +void LLOctreeTriangleRayIntersect::traverse(const LLOctreeNode<LLVolumeTriangle>* node) +{ +	LLVolumeOctreeListener* vl = (LLVolumeOctreeListener*) node->getListener(0); + +	/*const F32* start = mStart.getF32(); +	const F32* end = mEnd.getF32(); +	const F32* center = vl->mBounds[0].getF32(); +	const F32* size = vl->mBounds[1].getF32();*/ + +	//if (LLLineSegmentBoxIntersect(mStart, mEnd, vl->mBounds[0], vl->mBounds[1])) +	if (LLLineSegmentBoxIntersect(mStart.getF32ptr(), mEnd.getF32ptr(), vl->mBounds[0].getF32ptr(), vl->mBounds[1].getF32ptr())) +	{ +		node->accept(this); +		for (S32 i = 0; i < node->getChildCount(); ++i) +		{ +			traverse(node->getChild(i)); +		} +	} +} + +void LLOctreeTriangleRayIntersect::visit(const LLOctreeNode<LLVolumeTriangle>* node) +{ +	for (LLOctreeNode<LLVolumeTriangle>::const_element_iter iter =  +			node->getData().begin(); iter != node->getData().end(); ++iter) +	{ +		const LLVolumeTriangle* tri = *iter; + +		F32 a, b, t; +		 +		if (LLTriangleRayIntersect(*tri->mV[0], *tri->mV[1], *tri->mV[2], +				mStart, mDir, a, b, t)) +		{ +			if ((t >= 0.f) &&      // if hit is after start +				(t <= 1.f) &&      // and before end +				(t < *mClosestT))   // and this hit is closer +			{ +				*mClosestT = t; +				mHitFace = true; + +				if (mIntersection != NULL) +				{ +					LLVector4a intersect = mDir; +					intersect.mul(*mClosestT); +					intersect.add(mStart); +					mIntersection->set(intersect.getF32ptr()); +				} + + +				if (mTexCoord != NULL) +				{ +					LLVector2* tc = (LLVector2*) mFace->mTexCoords; +					*mTexCoord = ((1.f - a - b)  * tc[tri->mIndex[0]] + +						a              * tc[tri->mIndex[1]] + +						b              * tc[tri->mIndex[2]]); + +				} + +				if (mNormal != NULL) +				{ +					LLVector4* norm = (LLVector4*) mFace->mNormals; + +					*mNormal    = ((1.f - a - b)  * LLVector3(norm[tri->mIndex[0]]) +  +						a              * LLVector3(norm[tri->mIndex[1]]) + +						b              * LLVector3(norm[tri->mIndex[2]])); +				} + +				if (mBinormal != NULL) +				{ +					LLVector4* binormal = (LLVector4*) mFace->mBinormals; +					*mBinormal = ((1.f - a - b)  * LLVector3(binormal[tri->mIndex[0]]) +  +							a              * LLVector3(binormal[tri->mIndex[1]]) + +							b              * LLVector3(binormal[tri->mIndex[2]])); +				} +			} +		} +	} +} + +const LLVector4a& LLVolumeTriangle::getPositionGroup() const +{ +	return mPositionGroup; +} + +const F32& LLVolumeTriangle::getBinRadius() const +{ +	return mRadius; +} + + +//TEST CODE + +void LLVolumeOctreeValidate::visit(const LLOctreeNode<LLVolumeTriangle>* branch) +{ +	LLVolumeOctreeListener* node = (LLVolumeOctreeListener*) branch->getListener(0); + +	//make sure bounds matches extents +	LLVector4a& min = node->mExtents[0]; +	LLVector4a& max = node->mExtents[1]; + +	LLVector4a& center = node->mBounds[0]; +	LLVector4a& size = node->mBounds[1]; + +	LLVector4a test_min, test_max; +	test_min.setSub(center, size); +	test_max.setAdd(center, size); + +	if (!test_min.equals3(min, 0.001f) || +		!test_max.equals3(max, 0.001f)) +	{ +		llerrs << "Bad bounding box data found." << llendl; +	} + +	test_min.sub(LLVector4a(0.001f)); +	test_max.add(LLVector4a(0.001f)); + +	for (U32 i = 0; i < branch->getChildCount(); ++i) +	{ +		LLVolumeOctreeListener* child = (LLVolumeOctreeListener*) branch->getChild(i)->getListener(0); + +		//make sure all children fit inside this node +		if (child->mExtents[0].lessThan(test_min).areAnySet(LLVector4Logical::MASK_XYZ) || +			child->mExtents[1].greaterThan(test_max).areAnySet(LLVector4Logical::MASK_XYZ)) +		{ +			llerrs << "Child protrudes from bounding box." << llendl; +		} +	} + +	//children fit, check data +	for (LLOctreeNode<LLVolumeTriangle>::const_element_iter iter = branch->getData().begin();  +			iter != branch->getData().end(); ++iter) +	{ +		const LLVolumeTriangle* tri = *iter; + +		//validate triangle +		for (U32 i = 0; i < 3; i++) +		{ +			if (tri->mV[i]->greaterThan(test_max).areAnySet(LLVector4Logical::MASK_XYZ) || +				tri->mV[i]->lessThan(test_min).areAnySet(LLVector4Logical::MASK_XYZ)) +			{ +				llerrs << "Triangle protrudes from node." << llendl; +			} +		} +	} +} + + diff --git a/indra/llmath/llvolumeoctree.h b/indra/llmath/llvolumeoctree.h new file mode 100644 index 0000000000..688d91dc40 --- /dev/null +++ b/indra/llmath/llvolumeoctree.h @@ -0,0 +1,134 @@ +/**  + * @file llvolumeoctree.h + * @brief LLVolume octree classes. + * + * $LicenseInfo:firstyear=2002&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2010, Linden Research, Inc. + *  + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + *  + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * Lesser General Public License for more details. + *  + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA + *  + * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA + * $/LicenseInfo$ + */ + +#ifndef LL_LLVOLUME_OCTREE_H +#define LL_LLVOLUME_OCTREE_H + +#include "linden_common.h" +#include "llmemory.h" + +#include "lloctree.h" +#include "llvolume.h" +#include "llvector4a.h" + +class LLVolumeTriangle : public LLRefCount +{ +public: +	LLVolumeTriangle() +	{ +		 +	} + +	LLVolumeTriangle(const LLVolumeTriangle& rhs) +	{ +		*this = rhs; +	} + +	const LLVolumeTriangle& operator=(const LLVolumeTriangle& rhs) +	{ +		llerrs << "Illegal operation!" << llendl; +		return *this; +	} + +	~LLVolumeTriangle() +	{ +	 +	} + +	LLVector4a mPositionGroup; + +	const LLVector4a* mV[3]; +	U16 mIndex[3]; + +	F32 mRadius; + +	virtual const LLVector4a& getPositionGroup() const; +	virtual const F32& getBinRadius() const; +}; + +class LLVolumeOctreeListener : public LLOctreeListener<LLVolumeTriangle> +{ +public: +	 +	LLVolumeOctreeListener(LLOctreeNode<LLVolumeTriangle>* node); +	~LLVolumeOctreeListener(); +	 +	LLVolumeOctreeListener(const LLVolumeOctreeListener& rhs) +	{ +		*this = rhs; +	} + +	const LLVolumeOctreeListener& operator=(const LLVolumeOctreeListener& rhs) +	{ +		llerrs << "Illegal operation!" << llendl; +		return *this; +	} + +	 //LISTENER FUNCTIONS +	virtual void handleChildAddition(const LLOctreeNode<LLVolumeTriangle>* parent,  +		LLOctreeNode<LLVolumeTriangle>* child); +	virtual void handleStateChange(const LLTreeNode<LLVolumeTriangle>* node) { } +	virtual void handleChildRemoval(const LLOctreeNode<LLVolumeTriangle>* parent,  +			const LLOctreeNode<LLVolumeTriangle>* child) {	} +	virtual void handleInsertion(const LLTreeNode<LLVolumeTriangle>* node, LLVolumeTriangle* tri) { } +	virtual void handleRemoval(const LLTreeNode<LLVolumeTriangle>* node, LLVolumeTriangle* tri) { } +	virtual void handleDestruction(const LLTreeNode<LLVolumeTriangle>* node) { } +	 + +public: +	LLVector4a mBounds[2]; // bounding box (center, size) of this node and all its children (tight fit to objects) +	LLVector4a mExtents[2]; // extents (min, max) of this node and all its children +}; + +class LLOctreeTriangleRayIntersect : public LLOctreeTraveler<LLVolumeTriangle> +{ +public: +	const LLVolumeFace* mFace; +	LLVector4a mStart; +	LLVector4a mDir; +	LLVector4a mEnd; +	LLVector3* mIntersection; +	LLVector2* mTexCoord; +	LLVector3* mNormal; +	LLVector3* mBinormal; +	F32* mClosestT; +	bool mHitFace; + +	LLOctreeTriangleRayIntersect(const LLVector4a& start, const LLVector4a& dir,  +								   const LLVolumeFace* face, F32* closest_t, +								   LLVector3* intersection,LLVector2* tex_coord, LLVector3* normal, LLVector3* bi_normal); + +	void traverse(const LLOctreeNode<LLVolumeTriangle>* node); + +	virtual void visit(const LLOctreeNode<LLVolumeTriangle>* node); +}; + +class LLVolumeOctreeValidate : public LLOctreeTraveler<LLVolumeTriangle> +{ +	virtual void visit(const LLOctreeNode<LLVolumeTriangle>* branch); +}; + +#endif diff --git a/indra/llmath/m4math.cpp b/indra/llmath/m4math.cpp index 946b1553fe..6a1b4143cf 100644 --- a/indra/llmath/m4math.cpp +++ b/indra/llmath/m4math.cpp @@ -215,8 +215,33 @@ const LLMatrix4&	LLMatrix4::transpose()  F32 LLMatrix4::determinant() const  { -	llerrs << "Not implemented!" << llendl; -	return 0.f; +	F32 value = +	    mMatrix[0][3] * mMatrix[1][2] * mMatrix[2][1] * mMatrix[3][0] - +	    mMatrix[0][2] * mMatrix[1][3] * mMatrix[2][1] * mMatrix[3][0] - +	    mMatrix[0][3] * mMatrix[1][1] * mMatrix[2][2] * mMatrix[3][0] + +	    mMatrix[0][1] * mMatrix[1][3] * mMatrix[2][2] * mMatrix[3][0] + +	    mMatrix[0][2] * mMatrix[1][1] * mMatrix[2][3] * mMatrix[3][0] - +	    mMatrix[0][1] * mMatrix[1][2] * mMatrix[2][3] * mMatrix[3][0] - +	    mMatrix[0][3] * mMatrix[1][2] * mMatrix[2][0] * mMatrix[3][1] + +	    mMatrix[0][2] * mMatrix[1][3] * mMatrix[2][0] * mMatrix[3][1] + +	    mMatrix[0][3] * mMatrix[1][0] * mMatrix[2][2] * mMatrix[3][1] - +	    mMatrix[0][0] * mMatrix[1][3] * mMatrix[2][2] * mMatrix[3][1] - +	    mMatrix[0][2] * mMatrix[1][0] * mMatrix[2][3] * mMatrix[3][1] + +	    mMatrix[0][0] * mMatrix[1][2] * mMatrix[2][3] * mMatrix[3][1] + +	    mMatrix[0][3] * mMatrix[1][1] * mMatrix[2][0] * mMatrix[3][2] - +	    mMatrix[0][1] * mMatrix[1][3] * mMatrix[2][0] * mMatrix[3][2] - +	    mMatrix[0][3] * mMatrix[1][0] * mMatrix[2][1] * mMatrix[3][2] + +	    mMatrix[0][0] * mMatrix[1][3] * mMatrix[2][1] * mMatrix[3][2] + +	    mMatrix[0][1] * mMatrix[1][0] * mMatrix[2][3] * mMatrix[3][2] - +	    mMatrix[0][0] * mMatrix[1][1] * mMatrix[2][3] * mMatrix[3][2] - +	    mMatrix[0][2] * mMatrix[1][1] * mMatrix[2][0] * mMatrix[3][3] + +	    mMatrix[0][1] * mMatrix[1][2] * mMatrix[2][0] * mMatrix[3][3] + +	    mMatrix[0][2] * mMatrix[1][0] * mMatrix[2][1] * mMatrix[3][3] - +	    mMatrix[0][0] * mMatrix[1][2] * mMatrix[2][1] * mMatrix[3][3] - +	    mMatrix[0][1] * mMatrix[1][0] * mMatrix[2][2] * mMatrix[3][3] + +		mMatrix[0][0] * mMatrix[1][1] * mMatrix[2][2] * mMatrix[3][3]; + +	return value;  }  // Only works for pure orthonormal, homogeneous transform matrices. @@ -422,6 +447,17 @@ const LLMatrix4&  	LLMatrix4::initRotTrans(const LLQuaternion &q, const LLVector  	return (*this);  } +const LLMatrix4& LLMatrix4::initScale(const LLVector3 &scale) +{ +	setIdentity(); + +	mMatrix[VX][VX] = scale.mV[VX]; +	mMatrix[VY][VY] = scale.mV[VY]; +	mMatrix[VZ][VZ] = scale.mV[VZ]; +	 +	return (*this); +} +  const LLMatrix4& LLMatrix4::initAll(const LLVector3 &scale, const LLQuaternion &q, const LLVector3 &pos)  {  	F32		sx, sy, sz; @@ -642,37 +678,6 @@ const LLMatrix4&  	LLMatrix4::initMatrix(const LLMatrix3 &mat, const LLVector4 &  // LLMatrix4 Operators - -/* Not implemented to help enforce code consistency with the syntax of -   row-major notation.  This is a Good Thing. -LLVector4 operator*(const LLMatrix4 &a, const LLVector4 &b) -{ -	// Operate "to the right" on column-vector b -	LLVector4	vec; -	vec.mV[VX] = a.mMatrix[VX][VX] * b.mV[VX] +  -				 a.mMatrix[VY][VX] * b.mV[VY] +  - 				 a.mMatrix[VZ][VX] * b.mV[VZ] + -				 a.mMatrix[VW][VX] * b.mV[VW]; - -	vec.mV[VY] = a.mMatrix[VX][VY] * b.mV[VX] +  -				 a.mMatrix[VY][VY] * b.mV[VY] +  -				 a.mMatrix[VZ][VY] * b.mV[VZ] + -				 a.mMatrix[VW][VY] * b.mV[VW]; - -	vec.mV[VZ] = a.mMatrix[VX][VZ] * b.mV[VX] +  -			  	 a.mMatrix[VY][VZ] * b.mV[VY] +  -				 a.mMatrix[VZ][VZ] * b.mV[VZ] + -				 a.mMatrix[VW][VZ] * b.mV[VW]; - -	vec.mV[VW] = a.mMatrix[VX][VW] * b.mV[VX] +  -				 a.mMatrix[VY][VW] * b.mV[VY] +  -				 a.mMatrix[VZ][VW] * b.mV[VZ] + -				 a.mMatrix[VW][VW] * b.mV[VW]; -	return vec; -} -*/ - -  LLVector4 operator*(const LLVector4 &a, const LLMatrix4 &b)  {  	// Operate "to the left" on row-vector a @@ -768,6 +773,23 @@ bool operator!=(const LLMatrix4 &a, const LLMatrix4 &b)  	return FALSE;  } +bool operator<(const LLMatrix4& a, const LLMatrix4 &b) +{ +	U32		i, j; +	for (i = 0; i < NUM_VALUES_IN_MAT4; i++) +	{ +		for (j = 0; j < NUM_VALUES_IN_MAT4; j++) +		{ +			if (a.mMatrix[i][j] != b.mMatrix[i][j]) +			{ +				return a.mMatrix[i][j] < b.mMatrix[i][j]; +			} +		} +	} + +	return false; +} +  const LLMatrix4& operator*=(LLMatrix4 &a, F32 k)  {  	U32		i, j; @@ -807,4 +829,54 @@ std::ostream& operator<<(std::ostream& s, const LLMatrix4 &a)  	return s;  } +LLSD LLMatrix4::getValue() const +{ +	LLSD ret; +	 +	ret[0] = mMatrix[0][0]; +	ret[1] = mMatrix[0][1]; +	ret[2] = mMatrix[0][2]; +	ret[3] = mMatrix[0][3]; + +	ret[4] = mMatrix[1][0]; +	ret[5] = mMatrix[1][1]; +	ret[6] = mMatrix[1][2]; +	ret[7] = mMatrix[1][3]; + +	ret[8] = mMatrix[2][0]; +	ret[9] = mMatrix[2][1]; +	ret[10] = mMatrix[2][2]; +	ret[11] = mMatrix[2][3]; + +	ret[12] = mMatrix[3][0]; +	ret[13] = mMatrix[3][1]; +	ret[14] = mMatrix[3][2]; +	ret[15] = mMatrix[3][3]; + +	return ret; +} + +void LLMatrix4::setValue(const LLSD& data)  +{ +	mMatrix[0][0] = (F32)data[0].asReal(); +	mMatrix[0][1] = (F32)data[1].asReal(); +	mMatrix[0][2] = (F32)data[2].asReal(); +	mMatrix[0][3] = (F32)data[3].asReal(); + +	mMatrix[1][0] = (F32)data[4].asReal(); +	mMatrix[1][1] = (F32)data[5].asReal(); +	mMatrix[1][2] = (F32)data[6].asReal(); +	mMatrix[1][3] = (F32)data[7].asReal(); + +	mMatrix[2][0] = (F32)data[8].asReal(); +	mMatrix[2][1] = (F32)data[9].asReal(); +	mMatrix[2][2] = (F32)data[10].asReal(); +	mMatrix[2][3] = (F32)data[11].asReal(); + +	mMatrix[3][0] = (F32)data[12].asReal(); +	mMatrix[3][1] = (F32)data[13].asReal(); +	mMatrix[3][2] = (F32)data[14].asReal(); +	mMatrix[3][3] = (F32)data[15].asReal(); +} + diff --git a/indra/llmath/m4math.h b/indra/llmath/m4math.h index 6ec9958491..a7dce10397 100644 --- a/indra/llmath/m4math.h +++ b/indra/llmath/m4math.h @@ -119,6 +119,8 @@ public:  	~LLMatrix4(void);										// Destructor +	LLSD getValue() const; +	void setValue(const LLSD&);  	//////////////////////////////  	// @@ -132,6 +134,7 @@ public:  	// various useful matrix functions  	const LLMatrix4& setIdentity();					// Load identity matrix +	bool isIdentity() const;  	const LLMatrix4& setZero();						// Clears matrix to all zeros.  	const LLMatrix4& initRotation(const F32 angle, const F32 x, const F32 y, const F32 z);	// Calculate rotation matrix by rotating angle radians about (x, y, z) @@ -153,6 +156,7 @@ public:  	const LLMatrix4& initRotTrans(const F32 roll, const F32 pitch, const F32 yaw, const LLVector4 &pos); // Rotation from Euler + translation  	const LLMatrix4& initRotTrans(const LLQuaternion &q, const LLVector4 &pos);	// Set with Quaternion and position +	const LLMatrix4& initScale(const LLVector3 &scale);  	// Set all  	const LLMatrix4& initAll(const LLVector3 &scale, const LLQuaternion &q, const LLVector3 &pos);	 @@ -219,10 +223,7 @@ public:  	// Operators  	// -// Not implemented to enforce code that agrees with symbolic syntax -//		friend LLVector4 operator*(const LLMatrix4 &a, const LLVector4 &b);		// Apply rotation a to vector b - -//	friend inline LLMatrix4 operator*(const LLMatrix4 &a, const LLMatrix4 &b);		// Return a * b +	//	friend inline LLMatrix4 operator*(const LLMatrix4 &a, const LLMatrix4 &b);		// Return a * b  	friend LLVector4 operator*(const LLVector4 &a, const LLMatrix4 &b);		// Return transform of vector a by matrix b  	friend const LLVector3 operator*(const LLVector3 &a, const LLMatrix4 &b);		// Return full transform of a by matrix b  	friend LLVector4 rotate_vector(const LLVector4 &a, const LLMatrix4 &b);	// Rotates a but does not translate @@ -230,6 +231,7 @@ public:  	friend bool operator==(const LLMatrix4 &a, const LLMatrix4 &b);			// Return a == b  	friend bool operator!=(const LLMatrix4 &a, const LLMatrix4 &b);			// Return a != b +	friend bool operator<(const LLMatrix4 &a, const LLMatrix4& b);			// Return a < b  	friend const LLMatrix4& operator+=(LLMatrix4 &a, const LLMatrix4 &b);	// Return a + b  	friend const LLMatrix4& operator-=(LLMatrix4 &a, const LLMatrix4 &b);	// Return a - b @@ -263,6 +265,30 @@ inline const LLMatrix4&	LLMatrix4::setIdentity()  	return (*this);  } +inline bool LLMatrix4::isIdentity() const +{ +	return +		mMatrix[0][0] == 1.f && +		mMatrix[0][1] == 0.f && +		mMatrix[0][2] == 0.f && +		mMatrix[0][3] == 0.f && + +		mMatrix[1][0] == 0.f && +		mMatrix[1][1] == 1.f && +		mMatrix[1][2] == 0.f && +		mMatrix[1][3] == 0.f && + +		mMatrix[2][0] == 0.f && +		mMatrix[2][1] == 0.f && +		mMatrix[2][2] == 1.f && +		mMatrix[2][3] == 0.f && + +		mMatrix[3][0] == 0.f && +		mMatrix[3][1] == 0.f && +		mMatrix[3][2] == 0.f && +		mMatrix[3][3] == 1.f; +} +  /*  inline LLMatrix4 operator*(const LLMatrix4 &a, const LLMatrix4 &b) diff --git a/indra/llmath/tests/llbbox_test.cpp b/indra/llmath/tests/llbbox_test.cpp index 8064ab217d..fd0dbb58fc 100644 --- a/indra/llmath/tests/llbbox_test.cpp +++ b/indra/llmath/tests/llbbox_test.cpp @@ -34,7 +34,7 @@  #define ANGLE                (3.14159265f / 2.0f) -#define APPROX_EQUAL(a, b)   dist_vec((a),(b)) < 1e-5 +#define APPROX_EQUAL(a, b)   (dist_vec_squared((a),(b)) < 1e-10)  namespace tut  { diff --git a/indra/llmath/tests/llquaternion_test.cpp b/indra/llmath/tests/llquaternion_test.cpp index 9e79b299ff..e69010b2d6 100644 --- a/indra/llmath/tests/llquaternion_test.cpp +++ b/indra/llmath/tests/llquaternion_test.cpp @@ -29,12 +29,12 @@  #include "linden_common.h"  #include "../test/lltut.h" -#include "../llquaternion.h"  #include "../v4math.h"  #include "../v3math.h"  #include "../v3dmath.h"  #include "../m4math.h"  #include "../m3math.h" +#include "../llquaternion.h"  namespace tut  { diff --git a/indra/llmath/tests/m3math_test.cpp b/indra/llmath/tests/m3math_test.cpp index 8abf61b740..1ca2b005d9 100644 --- a/indra/llmath/tests/m3math_test.cpp +++ b/indra/llmath/tests/m3math_test.cpp @@ -37,6 +37,16 @@  #include "../test/lltut.h" +#if LL_WINDOWS +// disable unreachable code warnings caused by usage of skip. +#pragma warning(disable: 4702) +#endif + +#if LL_WINDOWS +// disable unreachable code warnings caused by usage of skip. +#pragma warning(disable: 4702) +#endif +  namespace tut  {  	struct m3math_test @@ -277,20 +287,21 @@ namespace tut  		LLVector3 llvec2(1, 2, 0);  		LLVector3 llvec3(2, 4, 2); +        skip("This test fails depending on architecture. Need to fix comparison operation, is_approx_equal, to work on more than one platform."); +  		llmat_obj.setRows(llvec1, llvec2, llvec3);  		llmat_obj.orthogonalize(); -		skip("Grr, LLMatrix3::orthogonalize test is failing.  Has it ever worked?");  		ensure("LLMatrix3::orthogonalize failed ", -		       is_approx_equal(0.19611613f, llmat_obj.mMatrix[0][0]) && +		       is_approx_equal(0.19611614f, llmat_obj.mMatrix[0][0]) &&  		       is_approx_equal(0.78446454f, llmat_obj.mMatrix[0][1]) && -		       is_approx_equal(0.58834839f, llmat_obj.mMatrix[0][2]) && -		       is_approx_equal(0.47628206f, llmat_obj.mMatrix[1][0]) && -		       is_approx_equal(0.44826555f, llmat_obj.mMatrix[1][1]) && -		       is_approx_equal(-0.75644791f, llmat_obj.mMatrix[1][2]) && -		       is_approx_equal(-0.85714287f, llmat_obj.mMatrix[2][0]) && +		       is_approx_equal(0.58834841f, llmat_obj.mMatrix[0][2]) && +		       is_approx_equal(0.47628204f, llmat_obj.mMatrix[1][0]) && +		       is_approx_equal(0.44826545f, llmat_obj.mMatrix[1][1]) && +		       is_approx_equal(-0.75644795f, llmat_obj.mMatrix[1][2]) && +		       is_approx_equal(-0.85714286f, llmat_obj.mMatrix[2][0]) &&  		       is_approx_equal(0.42857143f, llmat_obj.mMatrix[2][1]) && -		       is_approx_equal(-0.28571427f, llmat_obj.mMatrix[2][2])); +		       is_approx_equal(-0.28571429f, llmat_obj.mMatrix[2][2]));  	}  	//test case for adjointTranspose() fn. diff --git a/indra/llmath/tests/v2math_test.cpp b/indra/llmath/tests/v2math_test.cpp index 9747996b25..4d6a2eca93 100644 --- a/indra/llmath/tests/v2math_test.cpp +++ b/indra/llmath/tests/v2math_test.cpp @@ -85,7 +85,7 @@ namespace tut  		F32 x = 2.2345f, y = 3.5678f ;  		LLVector2 vec2(x,y);  		ensure("magVecSquared:Fail ", is_approx_equal(vec2.magVecSquared(), (x*x + y*y))); -		ensure("magVec:Fail ", is_approx_equal(vec2.magVec(), fsqrtf(x*x + y*y))); +		ensure("magVec:Fail ", is_approx_equal(vec2.magVec(), (F32) sqrt(x*x + y*y)));  	}  	template<> template<> @@ -407,7 +407,7 @@ namespace tut  		ensure_equals("dist_vec_squared values are not equal",val2, val1);  		val1 = 	dist_vec(vec2, vec3); -		val2 = fsqrtf((x1 - x2)*(x1 - x2) + (y1 - y2)* (y1 - y2)); +		val2 = (F32) sqrt((x1 - x2)*(x1 - x2) + (y1 - y2)* (y1 - y2));  		ensure_equals("dist_vec values are not equal",val2, val1);  	} @@ -431,7 +431,7 @@ namespace tut  		LLVector2 vec2(x1, y1);  		F32 vecMag = vec2.normVec(); -		F32 mag = fsqrtf(x1*x1 + y1*y1); +		F32 mag = (F32) sqrt(x1*x1 + y1*y1);  		F32 oomag = 1.f / mag;  		val1 = x1 * oomag; diff --git a/indra/llmath/tests/v3color_test.cpp b/indra/llmath/tests/v3color_test.cpp index 2c00f00ab3..29d1c483ab 100644 --- a/indra/llmath/tests/v3color_test.cpp +++ b/indra/llmath/tests/v3color_test.cpp @@ -93,7 +93,7 @@ namespace tut  		F32 r = 2.3436212f, g = 1231.f, b = 4.7849321232f;  		LLColor3 llcolor3(r,g,b);  		ensure("magVecSquared:Fail ", is_approx_equal(llcolor3.magVecSquared(), (r*r + g*g + b*b))); -		ensure("magVec:Fail ", is_approx_equal(llcolor3.magVec(), fsqrtf(r*r + g*g + b*b))); +		ensure("magVec:Fail ", is_approx_equal(llcolor3.magVec(), (F32) sqrt(r*r + g*g + b*b)));  	}  	template<> template<> @@ -103,7 +103,7 @@ namespace tut  		F32 val1, val2,val3;  		LLColor3 llcolor3(r,g,b);  		F32 vecMag = llcolor3.normVec(); -		F32 mag = fsqrtf(r*r + g*g + b*b); +		F32 mag = (F32) sqrt(r*r + g*g + b*b);  		F32 oomag = 1.f / mag;  		val1 = r * oomag;  		val2 = g * oomag; @@ -286,7 +286,7 @@ namespace tut  		F32 r1 =1.f, g1 = 2.f,b1 = 1.2f, r2 = -2.3f, g2 = 1.11f, b2 = 1234.234f;  		LLColor3 llcolor3(r1,g1,b1),llcolor3a(r2,g2,b2);  		F32 val = distVec(llcolor3,llcolor3a); -		ensure("distVec failed ", is_approx_equal(fsqrtf((r1-r2)*(r1-r2) + (g1-g2)*(g1-g2) + (b1-b2)*(b1-b2)) ,val)); +		ensure("distVec failed ", is_approx_equal((F32) sqrt((r1-r2)*(r1-r2) + (g1-g2)*(g1-g2) + (b1-b2)*(b1-b2)) ,val));  		F32 val1 = distVec_squared(llcolor3,llcolor3a);  		ensure("distVec_squared failed ", is_approx_equal(((r1-r2)*(r1-r2) + (g1-g2)*(g1-g2) + (b1-b2)*(b1-b2)) ,val1)); diff --git a/indra/llmath/tests/v3dmath_test.cpp b/indra/llmath/tests/v3dmath_test.cpp index b67346f4e5..20b26faa12 100644 --- a/indra/llmath/tests/v3dmath_test.cpp +++ b/indra/llmath/tests/v3dmath_test.cpp @@ -30,11 +30,11 @@  #include "llsd.h"  #include "../test/lltut.h" -#include "../llquaternion.h"  #include "../m3math.h"  #include "../v4math.h"  #include "../v3dmath.h"  #include "../v3dmath.h" +#include "../llquaternion.h"  namespace tut  { @@ -403,7 +403,7 @@ namespace tut  		LLVector3d vec3D(x,y,z);  		F64 res = (x*x + y*y + z*z) - vec3D.magVecSquared();  		ensure("1:magVecSquared:Fail ", ((-F_APPROXIMATELY_ZERO <= res)&& (res <=F_APPROXIMATELY_ZERO))); -		res = fsqrtf(x*x + y*y + z*z) - vec3D.magVec(); +		res = (F32) sqrt(x*x + y*y + z*z) - vec3D.magVec();  		ensure("2:magVec: Fail ", ((-F_APPROXIMATELY_ZERO <= res)&& (res <=F_APPROXIMATELY_ZERO)));	  	} diff --git a/indra/llmath/tests/v3math_test.cpp b/indra/llmath/tests/v3math_test.cpp index e4732bf861..e4ae1c10ef 100644 --- a/indra/llmath/tests/v3math_test.cpp +++ b/indra/llmath/tests/v3math_test.cpp @@ -30,12 +30,12 @@  #include "../test/lltut.h"  #include "llsd.h" -#include "../llquaternion.h" -#include "../llquantize.h"  #include "../v3dmath.h"  #include "../m3math.h"  #include "../v4math.h"  #include "../v3math.h" +#include "../llquaternion.h" +#include "../llquantize.h"  namespace tut @@ -149,7 +149,7 @@ namespace tut  		F32 x = 2.32f, y = 1.212f, z = -.12f;  		LLVector3 vec3(x,y,z);		  		ensure("1:magVecSquared:Fail ", is_approx_equal(vec3.magVecSquared(), (x*x + y*y + z*z))); -		ensure("2:magVec:Fail ", is_approx_equal(vec3.magVec(), fsqrtf(x*x + y*y + z*z))); +		ensure("2:magVec:Fail ", is_approx_equal(vec3.magVec(), (F32) sqrt(x*x + y*y + z*z)));  	}  	template<> template<> @@ -509,7 +509,7 @@ namespace tut  		F32 val1,val2;  		LLVector3 vec3(x1,y1,z1),vec3a(x2,y2,z2);  		val1 = dist_vec(vec3,vec3a); -		val2 = fsqrtf((x1 - x2)*(x1 - x2) + (y1 - y2)* (y1 - y2) + (z1 - z2)* (z1 -z2)); +		val2 = (F32) sqrt((x1 - x2)*(x1 - x2) + (y1 - y2)* (y1 - y2) + (z1 - z2)* (z1 -z2));  		ensure_equals("1:dist_vec: Fail ",val2, val1);  		val1 = dist_vec_squared(vec3,vec3a);  		val2 =((x1 - x2)*(x1 - x2) + (y1 - y2)* (y1 - y2) + (z1 - z2)* (z1 -z2)); @@ -564,4 +564,22 @@ namespace tut  		z1 = U8_to_F32(F32_to_U8(z, lowerz, upperz), lowerz, upperz);  		ensure("2:quantize8: Fail ", is_approx_equal(x1, vec3a.mV[VX]) && is_approx_equal(y1, vec3a.mV[VY]) && is_approx_equal(z1, vec3a.mV[VZ]));  	} + +	template<> template<> +	void v3math_object::test<35>() +	{ +		LLSD sd = LLSD::emptyArray(); +		sd[0] = 1.f; + +		LLVector3 parsed_1(sd); +		ensure("1:LLSD parse: Fail ", is_approx_equal(parsed_1.mV[VX], 1.f) && is_approx_equal(parsed_1.mV[VY], 0.f) && is_approx_equal(parsed_1.mV[VZ], 0.f)); + +		sd[1] = 2.f; +		LLVector3 parsed_2(sd); +		ensure("2:LLSD parse: Fail ", is_approx_equal(parsed_2.mV[VX], 1.f) && is_approx_equal(parsed_2.mV[VY], 2.f) && is_approx_equal(parsed_2.mV[VZ], 0.f)); + +		sd[2] = 3.f; +		LLVector3 parsed_3(sd); +		ensure("3:LLSD parse: Fail ", is_approx_equal(parsed_3.mV[VX], 1.f) && is_approx_equal(parsed_3.mV[VY], 2.f) && is_approx_equal(parsed_3.mV[VZ], 3.f)); +	}  } diff --git a/indra/llmath/tests/v4color_test.cpp b/indra/llmath/tests/v4color_test.cpp index fbd43625d1..d7eec3c87f 100644 --- a/indra/llmath/tests/v4color_test.cpp +++ b/indra/llmath/tests/v4color_test.cpp @@ -155,7 +155,7 @@ namespace tut  		F32 r = 0x20, g = 0xFFFF, b = 0xFF;  		LLColor4 llcolor4(r,g,b);  		ensure("magVecSquared:Fail ", is_approx_equal(llcolor4.magVecSquared(), (r*r + g*g + b*b))); -		ensure("magVec:Fail ", is_approx_equal(llcolor4.magVec(), fsqrtf(r*r + g*g + b*b))); +		ensure("magVec:Fail ", is_approx_equal(llcolor4.magVec(), (F32) sqrt(r*r + g*g + b*b)));  	}  	template<> template<> @@ -164,7 +164,7 @@ namespace tut  		F32 r = 0x20, g = 0xFFFF, b = 0xFF;  		LLColor4 llcolor4(r,g,b);  		F32 vecMag = llcolor4.normVec(); -		F32 mag = fsqrtf(r*r + g*g + b*b); +		F32 mag = (F32) sqrt(r*r + g*g + b*b);  		F32 oomag = 1.f / mag;  		F32 val1 = r * oomag, val2 = g * oomag,	val3 = b * oomag;  		ensure("1:normVec failed ", (is_approx_equal(val1, llcolor4.mV[0]) && is_approx_equal(val2, llcolor4.mV[1]) && is_approx_equal(val3, llcolor4.mV[2]) && is_approx_equal(vecMag, mag))); diff --git a/indra/llmath/tests/v4coloru_test.cpp b/indra/llmath/tests/v4coloru_test.cpp index 6d84ba41ef..128f6f3564 100644 --- a/indra/llmath/tests/v4coloru_test.cpp +++ b/indra/llmath/tests/v4coloru_test.cpp @@ -135,7 +135,7 @@ namespace tut  		U8 r = 0x12, g = 0xFF, b = 0xAF;  		LLColor4U llcolor4u(r,g,b);  		ensure("magVecSquared:Fail ", is_approx_equal(llcolor4u.magVecSquared(), (F32)(r*r + g*g + b*b))); -		ensure("magVec:Fail ", is_approx_equal(llcolor4u.magVec(), fsqrtf(r*r + g*g + b*b))); +		ensure("magVec:Fail ", is_approx_equal(llcolor4u.magVec(), (F32) sqrt((F32) (r*r + g*g + b*b))));  	}  	template<> template<> diff --git a/indra/llmath/tests/v4math_test.cpp b/indra/llmath/tests/v4math_test.cpp index b1f934e555..191ac864df 100644 --- a/indra/llmath/tests/v4math_test.cpp +++ b/indra/llmath/tests/v4math_test.cpp @@ -30,9 +30,9 @@  #include "../test/lltut.h"  #include "llsd.h" -#include "../llquaternion.h"  #include "../m4math.h"  #include "../v4math.h" +#include "../llquaternion.h"  namespace tut  { @@ -96,7 +96,7 @@ namespace tut  	{  		F32 x = 10.f, y = -2.3f, z = -.023f;  		LLVector4 vec4(x,y,z); -		ensure("magVec:Fail ", is_approx_equal(vec4.magVec(), fsqrtf(x*x + y*y + z*z))); +		ensure("magVec:Fail ", is_approx_equal(vec4.magVec(), (F32) sqrt(x*x + y*y + z*z)));  		ensure("magVecSquared:Fail ", is_approx_equal(vec4.magVecSquared(), (x*x + y*y + z*z)));  	} @@ -337,7 +337,7 @@ namespace tut  		F32 val1,val2;  		LLVector4 vec4(x1,y1,z1),vec4a(x2,y2,z2);  		val1 = dist_vec(vec4,vec4a); -		val2 = fsqrtf((x1 - x2)*(x1 - x2) + (y1 - y2)* (y1 - y2) + (z1 - z2)* (z1 -z2)); +		val2 = (F32) sqrt((x1 - x2)*(x1 - x2) + (y1 - y2)* (y1 - y2) + (z1 - z2)* (z1 -z2));  		ensure_equals("dist_vec: Fail ",val2, val1);  		val1 = dist_vec_squared(vec4,vec4a);  		val2 =((x1 - x2)*(x1 - x2) + (y1 - y2)* (y1 - y2) + (z1 - z2)* (z1 -z2)); diff --git a/indra/llmath/v2math.cpp b/indra/llmath/v2math.cpp index 0180049b5d..a0cd642853 100644 --- a/indra/llmath/v2math.cpp +++ b/indra/llmath/v2math.cpp @@ -86,7 +86,7 @@ F32	dist_vec(const LLVector2 &a, const LLVector2 &b)  {  	F32 x = a.mV[0] - b.mV[0];  	F32 y = a.mV[1] - b.mV[1]; -	return fsqrtf( x*x + y*y ); +	return (F32) sqrt( x*x + y*y );  }  F32	dist_vec_squared(const LLVector2 &a, const LLVector2 &b) @@ -109,3 +109,18 @@ LLVector2 lerp(const LLVector2 &a, const LLVector2 &b, F32 u)  		a.mV[VX] + (b.mV[VX] - a.mV[VX]) * u,  		a.mV[VY] + (b.mV[VY] - a.mV[VY]) * u );  } + +LLSD LLVector2::getValue() const +{ +	LLSD ret; +	ret[0] = mV[0]; +	ret[1] = mV[1]; +	return ret; +} + +void LLVector2::setValue(LLSD& sd) +{ +	mV[0] = (F32) sd[0].asReal(); +	mV[1] = (F32) sd[1].asReal(); +} + diff --git a/indra/llmath/v2math.h b/indra/llmath/v2math.h index f50a5e6633..8d5db96f5e 100644 --- a/indra/llmath/v2math.h +++ b/indra/llmath/v2math.h @@ -60,6 +60,9 @@ class LLVector2  		void	set(const LLVector2 &vec);	// Sets LLVector2 to vec  		void	set(const F32 *vec);			// Sets LLVector2 to vec +		LLSD	getValue() const; +		void	setValue(LLSD& sd); +  		void	setVec(F32 x, F32 y);	        // deprecated  		void	setVec(const LLVector2 &vec);	// deprecated  		void	setVec(const F32 *vec);			// deprecated @@ -216,7 +219,7 @@ inline void	LLVector2::setVec(const F32 *vec)  inline F32 LLVector2::length(void) const  { -	return fsqrtf(mV[0]*mV[0] + mV[1]*mV[1]); +	return (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1]);  }  inline F32 LLVector2::lengthSquared(void) const @@ -226,7 +229,7 @@ inline F32 LLVector2::lengthSquared(void) const  inline F32		LLVector2::normalize(void)  { -	F32 mag = fsqrtf(mV[0]*mV[0] + mV[1]*mV[1]); +	F32 mag = (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1]);  	F32 oomag;  	if (mag > FP_MAG_THRESHOLD) @@ -253,7 +256,7 @@ inline bool LLVector2::isFinite() const  // deprecated  inline F32		LLVector2::magVec(void) const  { -	return fsqrtf(mV[0]*mV[0] + mV[1]*mV[1]); +	return (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1]);  }  // deprecated @@ -265,7 +268,7 @@ inline F32		LLVector2::magVecSquared(void) const  // deprecated  inline F32		LLVector2::normVec(void)  { -	F32 mag = fsqrtf(mV[0]*mV[0] + mV[1]*mV[1]); +	F32 mag = (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1]);  	F32 oomag;  	if (mag > FP_MAG_THRESHOLD) diff --git a/indra/llmath/v3color.h b/indra/llmath/v3color.h index 327e452bf7..56cb2ae73e 100644 --- a/indra/llmath/v3color.h +++ b/indra/llmath/v3color.h @@ -278,7 +278,7 @@ inline F32		LLColor3::brightness(void) const  inline F32		LLColor3::length(void) const  { -	return fsqrtf(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]); +	return (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);  }  inline F32		LLColor3::lengthSquared(void) const @@ -288,7 +288,7 @@ inline F32		LLColor3::lengthSquared(void) const  inline F32		LLColor3::normalize(void)  { -	F32 mag = fsqrtf(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]); +	F32 mag = (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);  	F32 oomag;  	if (mag) @@ -304,7 +304,7 @@ inline F32		LLColor3::normalize(void)  // deprecated  inline F32		LLColor3::magVec(void) const  { -	return fsqrtf(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]); +	return (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);  }  // deprecated @@ -316,7 +316,7 @@ inline F32		LLColor3::magVecSquared(void) const  // deprecated  inline F32		LLColor3::normVec(void)  { -	F32 mag = fsqrtf(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]); +	F32 mag = (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);  	F32 oomag;  	if (mag) @@ -438,7 +438,7 @@ inline F32		distVec(const LLColor3 &a, const LLColor3 &b)  	F32 x = a.mV[0] - b.mV[0];  	F32 y = a.mV[1] - b.mV[1];  	F32 z = a.mV[2] - b.mV[2]; -	return fsqrtf( x*x + y*y + z*z ); +	return (F32) sqrt( x*x + y*y + z*z );  }  inline F32		distVec_squared(const LLColor3 &a, const LLColor3 &b) diff --git a/indra/llmath/v3dmath.h b/indra/llmath/v3dmath.h index 664c986ad0..578dcdc8ea 100644 --- a/indra/llmath/v3dmath.h +++ b/indra/llmath/v3dmath.h @@ -234,7 +234,7 @@ inline const LLVector3d&	LLVector3d::setVec(const F64 *vec)  inline F64 LLVector3d::normVec(void)  { -	F64 mag = fsqrtf(mdV[0]*mdV[0] + mdV[1]*mdV[1] + mdV[2]*mdV[2]); +	F64 mag = (F32) sqrt(mdV[0]*mdV[0] + mdV[1]*mdV[1] + mdV[2]*mdV[2]);  	F64 oomag;  	if (mag > FP_MAG_THRESHOLD) @@ -256,7 +256,7 @@ inline F64 LLVector3d::normVec(void)  inline F64 LLVector3d::normalize(void)  { -	F64 mag = fsqrtf(mdV[0]*mdV[0] + mdV[1]*mdV[1] + mdV[2]*mdV[2]); +	F64 mag = (F32) sqrt(mdV[0]*mdV[0] + mdV[1]*mdV[1] + mdV[2]*mdV[2]);  	F64 oomag;  	if (mag > FP_MAG_THRESHOLD) @@ -280,7 +280,7 @@ inline F64 LLVector3d::normalize(void)  inline F64	LLVector3d::magVec(void) const  { -	return fsqrtf(mdV[0]*mdV[0] + mdV[1]*mdV[1] + mdV[2]*mdV[2]); +	return (F32) sqrt(mdV[0]*mdV[0] + mdV[1]*mdV[1] + mdV[2]*mdV[2]);  }  inline F64	LLVector3d::magVecSquared(void) const @@ -290,7 +290,7 @@ inline F64	LLVector3d::magVecSquared(void) const  inline F64	LLVector3d::length(void) const  { -	return fsqrtf(mdV[0]*mdV[0] + mdV[1]*mdV[1] + mdV[2]*mdV[2]); +	return (F32) sqrt(mdV[0]*mdV[0] + mdV[1]*mdV[1] + mdV[2]*mdV[2]);  }  inline F64	LLVector3d::lengthSquared(void) const @@ -400,7 +400,7 @@ inline F64	dist_vec(const LLVector3d &a, const LLVector3d &b)  	F64 x = a.mdV[0] - b.mdV[0];  	F64 y = a.mdV[1] - b.mdV[1];  	F64 z = a.mdV[2] - b.mdV[2]; -	return fsqrtf( x*x + y*y + z*z ); +	return (F32) sqrt( x*x + y*y + z*z );  }  inline F64	dist_vec_squared(const LLVector3d &a, const LLVector3d &b) diff --git a/indra/llmath/v3math.cpp b/indra/llmath/v3math.cpp index fd08df02d8..e7107dee16 100644 --- a/indra/llmath/v3math.cpp +++ b/indra/llmath/v3math.cpp @@ -134,6 +134,21 @@ BOOL LLVector3::clampLength( F32 length_limit )  	return changed;  } +BOOL LLVector3::clamp(const LLVector3 &min_vec, const LLVector3 &max_vec) +{ +	BOOL ret = FALSE; + +	if (mV[0] < min_vec[0]) { mV[0] = min_vec[0]; ret = TRUE; } +	if (mV[1] < min_vec[1]) { mV[1] = min_vec[1]; ret = TRUE; } +	if (mV[2] < min_vec[2]) { mV[2] = min_vec[2]; ret = TRUE; } + +	if (mV[0] > max_vec[0]) { mV[0] = max_vec[0]; ret = TRUE; } +	if (mV[1] > max_vec[1]) { mV[1] = max_vec[1]; ret = TRUE; } +	if (mV[2] > max_vec[2]) { mV[2] = max_vec[2]; ret = TRUE; } + +	return ret; +} +  // Sets all values to absolute value of their original values  // Returns TRUE if data changed @@ -191,6 +206,28 @@ const LLVector3&	LLVector3::rotVec(const LLQuaternion &q)  	return *this;  } +const LLVector3& LLVector3::transVec(const LLMatrix4& mat) +{ +	setVec( +			mV[VX] * mat.mMatrix[VX][VX] +  +			mV[VY] * mat.mMatrix[VX][VY] +  +			mV[VZ] * mat.mMatrix[VX][VZ] + +			mat.mMatrix[VX][VW], +			  +			mV[VX] * mat.mMatrix[VY][VX] +  +			mV[VY] * mat.mMatrix[VY][VY] +  +			mV[VZ] * mat.mMatrix[VY][VZ] + +			mat.mMatrix[VY][VW], + +			mV[VX] * mat.mMatrix[VZ][VX] +  +			mV[VY] * mat.mMatrix[VZ][VY] +  +			mV[VZ] * mat.mMatrix[VZ][VZ] + +			mat.mMatrix[VZ][VW]); + +	return *this; +} + +  const LLVector3&	LLVector3::rotVec(F32 angle, const LLVector3 &vec)  {  	if ( !vec.isExactlyZero() && angle ) diff --git a/indra/llmath/v3math.h b/indra/llmath/v3math.h index dbd38c1c3f..0432aeba4c 100644 --- a/indra/llmath/v3math.h +++ b/indra/llmath/v3math.h @@ -34,6 +34,7 @@  class LLVector2;  class LLVector4;  class LLMatrix3; +class LLMatrix4;  class LLVector3d;  class LLQuaternion; @@ -69,6 +70,7 @@ class LLVector3  		inline BOOL isFinite() const;									// checks to see if all values of LLVector3 are finite  		BOOL		clamp(F32 min, F32 max);		// Clamps all values to (min,max), returns TRUE if data changed +		BOOL		clamp(const LLVector3 &min_vec, const LLVector3 &max_vec); // Scales vector by another vector  		BOOL		clampLength( F32 length_limit );					// Scales vector to limit length to a value  		void		quantize16(F32 lowerxy, F32 upperxy, F32 lowerz, F32 upperz);	// changes the vector to reflect quatization @@ -109,6 +111,7 @@ class LLVector3  		const LLVector3&	rotVec(F32 angle, F32 x, F32 y, F32 z);		// Rotates about x,y,z by angle radians  		const LLVector3&	rotVec(const LLMatrix3 &mat);				// Rotates by LLMatrix4 mat  		const LLVector3&	rotVec(const LLQuaternion &q);				// Rotates by LLQuaternion q +		const LLVector3&	transVec(const LLMatrix4& mat);				// Transforms by LLMatrix4 mat (mat * v)  		const LLVector3&	scaleVec(const LLVector3& vec);				// scales per component by vec  		LLVector3			scaledVec(const LLVector3& vec) const;			// get a copy of this vector scaled by vec @@ -156,6 +159,8 @@ F32	dist_vec(const LLVector3 &a, const LLVector3 &b);		// Returns distance betwe  F32	dist_vec_squared(const LLVector3 &a, const LLVector3 &b);// Returns distance squared between a and b  F32	dist_vec_squared2D(const LLVector3 &a, const LLVector3 &b);// Returns distance squared between a and b ignoring Z component  LLVector3 projected_vec(const LLVector3 &a, const LLVector3 &b); // Returns vector a projected on vector b +LLVector3 parallel_component(const LLVector3 &a, const LLVector3 &b); // Returns vector a projected on vector b (same as projected_vec) +LLVector3 orthogonal_component(const LLVector3 &a, const LLVector3 &b); // Returns component of vector a not parallel to vector b (same as projected_vec)  LLVector3 lerp(const LLVector3 &a, const LLVector3 &b, F32 u); // Returns a vector that is a linear interpolation between a and b  inline LLVector3::LLVector3(void) @@ -274,7 +279,7 @@ inline void	LLVector3::setVec(const F32 *vec)  inline F32 LLVector3::normalize(void)  { -	F32 mag = fsqrtf(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]); +	F32 mag = (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);  	F32 oomag;  	if (mag > FP_MAG_THRESHOLD) @@ -297,7 +302,7 @@ inline F32 LLVector3::normalize(void)  // deprecated  inline F32 LLVector3::normVec(void)  { -	F32 mag = fsqrtf(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]); +	F32 mag = (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);  	F32 oomag;  	if (mag > FP_MAG_THRESHOLD) @@ -321,7 +326,7 @@ inline F32 LLVector3::normVec(void)  inline F32	LLVector3::length(void) const  { -	return fsqrtf(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]); +	return (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);  }  inline F32	LLVector3::lengthSquared(void) const @@ -331,7 +336,7 @@ inline F32	LLVector3::lengthSquared(void) const  inline F32	LLVector3::magVec(void) const  { -	return fsqrtf(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]); +	return (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);  }  inline F32	LLVector3::magVecSquared(void) const @@ -465,7 +470,7 @@ inline F32	dist_vec(const LLVector3 &a, const LLVector3 &b)  	F32 x = a.mV[0] - b.mV[0];  	F32 y = a.mV[1] - b.mV[1];  	F32 z = a.mV[2] - b.mV[2]; -	return fsqrtf( x*x + y*y + z*z ); +	return (F32) sqrt( x*x + y*y + z*z );  }  inline F32	dist_vec_squared(const LLVector3 &a, const LLVector3 &b) @@ -490,6 +495,17 @@ inline LLVector3 projected_vec(const LLVector3 &a, const LLVector3 &b)  	return project_axis * (a * project_axis);  } +inline LLVector3 parallel_component(const LLVector3 &a, const LLVector3 &b) +{ +	return projected_vec(a, b); +} + +inline LLVector3 orthogonal_component(const LLVector3 &a, const LLVector3 &b) +{ +	return a - projected_vec(a, b); +} + +  inline LLVector3 lerp(const LLVector3 &a, const LLVector3 &b, F32 u)  {  	return LLVector3( @@ -523,6 +539,21 @@ inline void update_min_max(LLVector3& min, LLVector3& max, const LLVector3& pos)  	}  } +inline void update_min_max(LLVector3& min, LLVector3& max, const F32* pos) +{ +	for (U32 i = 0; i < 3; i++) +	{ +		if (min.mV[i] > pos[i]) +		{ +			min.mV[i] = pos[i]; +		} +		if (max.mV[i] < pos[i]) +		{ +			max.mV[i] = pos[i]; +		} +	} +} +  inline F32 angle_between(const LLVector3& a, const LLVector3& b)  {  	LLVector3 an = a; diff --git a/indra/llmath/v4color.h b/indra/llmath/v4color.h index 60d24e2e11..b047f86e6e 100644 --- a/indra/llmath/v4color.h +++ b/indra/llmath/v4color.h @@ -108,6 +108,7 @@ class LLColor4  	    const LLColor4& operator=(const LLColor3 &a);	// Assigns vec3 to vec4 and returns vec4 +		bool operator<(const LLColor4& rhs) const;  		friend std::ostream&	 operator<<(std::ostream& s, const LLColor4 &a);		// Print a  		friend LLColor4 operator+(const LLColor4 &a, const LLColor4 &b);	// Return vector a + b  		friend LLColor4 operator-(const LLColor4 &a, const LLColor4 &b);	// Return vector a minus b @@ -385,7 +386,7 @@ inline const LLColor4&	LLColor4::setAlpha(F32 a)  inline F32		LLColor4::length(void) const  { -	return fsqrtf(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]); +	return (F32) sqrt(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);  }  inline F32		LLColor4::lengthSquared(void) const @@ -395,7 +396,7 @@ inline F32		LLColor4::lengthSquared(void) const  inline F32		LLColor4::normalize(void)  { -	F32 mag = fsqrtf(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]); +	F32 mag = (F32) sqrt(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);  	F32 oomag;  	if (mag) @@ -411,7 +412,7 @@ inline F32		LLColor4::normalize(void)  // deprecated  inline F32		LLColor4::magVec(void) const  { -	return fsqrtf(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]); +	return (F32) sqrt(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);  }  // deprecated @@ -423,7 +424,7 @@ inline F32		LLColor4::magVecSquared(void) const  // deprecated  inline F32		LLColor4::normVec(void)  { -	F32 mag = fsqrtf(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]); +	F32 mag = (F32) sqrt(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);  	F32 oomag;  	if (mag) @@ -589,6 +590,23 @@ inline LLColor4 lerp(const LLColor4 &a, const LLColor4 &b, F32 u)  		a.mV[VW] + (b.mV[VW] - a.mV[VW]) * u);  } +inline bool LLColor4::operator<(const LLColor4& rhs) const +{ +	if (mV[0] != rhs.mV[0]) +	{ +		return mV[0] < rhs.mV[0]; +	} +	if (mV[1] != rhs.mV[1]) +	{ +		return mV[1] < rhs.mV[1]; +	} +	if (mV[2] != rhs.mV[2]) +	{ +		return mV[2] < rhs.mV[2]; +	} + +	return mV[3] < rhs.mV[3]; +}  void LLColor4::clamp()  { diff --git a/indra/llmath/v4coloru.h b/indra/llmath/v4coloru.h index 7471aebe02..12da7e2dd7 100644 --- a/indra/llmath/v4coloru.h +++ b/indra/llmath/v4coloru.h @@ -294,7 +294,7 @@ inline const LLColor4U&	LLColor4U::setAlpha(U8 a)  inline F32		LLColor4U::length(void) const  { -	return fsqrtf( ((F32)mV[VX]) * mV[VX] + ((F32)mV[VY]) * mV[VY] + ((F32)mV[VZ]) * mV[VZ] ); +	return (F32) sqrt( ((F32)mV[VX]) * mV[VX] + ((F32)mV[VY]) * mV[VY] + ((F32)mV[VZ]) * mV[VZ] );  }  inline F32		LLColor4U::lengthSquared(void) const @@ -305,7 +305,7 @@ inline F32		LLColor4U::lengthSquared(void) const  // deprecated  inline F32		LLColor4U::magVec(void) const  { -	return fsqrtf( ((F32)mV[VX]) * mV[VX] + ((F32)mV[VY]) * mV[VY] + ((F32)mV[VZ]) * mV[VZ] ); +	return (F32) sqrt( ((F32)mV[VX]) * mV[VX] + ((F32)mV[VY]) * mV[VY] + ((F32)mV[VZ]) * mV[VZ] );  }  // deprecated diff --git a/indra/llmath/v4math.h b/indra/llmath/v4math.h index e7028626f9..623c8b2003 100644 --- a/indra/llmath/v4math.h +++ b/indra/llmath/v4math.h @@ -315,7 +315,7 @@ inline void	LLVector4::setVec(const F32 *vec)  inline F32		LLVector4::length(void) const  { -	return fsqrtf(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]); +	return (F32) sqrt(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);  }  inline F32		LLVector4::lengthSquared(void) const @@ -325,7 +325,7 @@ inline F32		LLVector4::lengthSquared(void) const  inline F32		LLVector4::magVec(void) const  { -	return fsqrtf(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]); +	return (F32) sqrt(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);  }  inline F32		LLVector4::magVecSquared(void) const @@ -457,7 +457,7 @@ inline LLVector4 lerp(const LLVector4 &a, const LLVector4 &b, F32 u)  inline F32		LLVector4::normalize(void)  { -	F32 mag = fsqrtf(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]); +	F32 mag = (F32) sqrt(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);  	F32 oomag;  	if (mag > FP_MAG_THRESHOLD) @@ -480,7 +480,7 @@ inline F32		LLVector4::normalize(void)  // deprecated  inline F32		LLVector4::normVec(void)  { -	F32 mag = fsqrtf(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]); +	F32 mag = (F32) sqrt(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);  	F32 oomag;  	if (mag > FP_MAG_THRESHOLD) diff --git a/indra/llmath/xform.h b/indra/llmath/xform.h index 5159c1cbfe..1b50749b3e 100644 --- a/indra/llmath/xform.h +++ b/indra/llmath/xform.h @@ -32,11 +32,11 @@  const F32 MAX_OBJECT_Z 		= 4096.f; // should match REGION_HEIGHT_METERS, Pre-havok4: 768.f  const F32 MIN_OBJECT_Z 		= -256.f; -const F32 DEFAULT_MAX_PRIM_SCALE = 10.f; +const F32 DEFAULT_MAX_PRIM_SCALE = 64.f; +const F32 DEFAULT_MAX_PRIM_SCALE_NO_MESH = 10.f;  const F32 MIN_PRIM_SCALE = 0.01f;  const F32 MAX_PRIM_SCALE = 65536.f;	// something very high but not near FLT_MAX -  class LLXform  {  protected: | 
