diff options
Diffstat (limited to 'indra/llmath')
| -rw-r--r-- | indra/llmath/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | indra/llmath/llcamera.h | 12 | ||||
| -rw-r--r-- | indra/llmath/llmath.h | 6 | ||||
| -rw-r--r-- | indra/llmath/llmatrix3a.h | 2 | ||||
| -rw-r--r-- | indra/llmath/llmatrix4a.h | 2 | ||||
| -rw-r--r-- | indra/llmath/lloctree.h | 147 | ||||
| -rw-r--r-- | indra/llmath/llplane.h | 4 | ||||
| -rw-r--r-- | indra/llmath/llsimdmath.h | 3 | ||||
| -rw-r--r-- | indra/llmath/llsimdtypes.inl | 2 | ||||
| -rw-r--r-- | indra/llmath/llvector4a.cpp | 8 | ||||
| -rw-r--r-- | indra/llmath/llvector4a.h | 6 | ||||
| -rw-r--r-- | indra/llmath/llvector4a.inl | 1 | ||||
| -rw-r--r-- | indra/llmath/llvector4logical.h | 2 | ||||
| -rw-r--r-- | indra/llmath/llvolume.cpp | 341 | ||||
| -rw-r--r-- | indra/llmath/llvolume.h | 16 | ||||
| -rw-r--r-- | indra/llmath/llvolumemgr.cpp | 3 | ||||
| -rw-r--r-- | indra/llmath/llvolumeoctree.cpp | 6 | ||||
| -rw-r--r-- | indra/llmath/llvolumeoctree.h | 35 | ||||
| -rw-r--r-- | indra/llmath/tests/alignment_test.cpp | 128 | 
19 files changed, 331 insertions, 394 deletions
diff --git a/indra/llmath/CMakeLists.txt b/indra/llmath/CMakeLists.txt index b5e59c1ca3..5865ae030c 100644 --- a/indra/llmath/CMakeLists.txt +++ b/indra/llmath/CMakeLists.txt @@ -117,6 +117,7 @@ if (LL_TESTS)    # INTEGRATION TESTS    set(test_libs llmath llcommon ${LLCOMMON_LIBRARIES} ${WINDOWS_LIBRARIES})    # TODO: Some of these need refactoring to be proper Unit tests rather than Integration tests. +  LL_ADD_INTEGRATION_TEST(alignment "" "${test_libs}")    LL_ADD_INTEGRATION_TEST(llbbox llbbox.cpp "${test_libs}")    LL_ADD_INTEGRATION_TEST(llquaternion llquaternion.cpp "${test_libs}")    LL_ADD_INTEGRATION_TEST(mathmisc "" "${test_libs}") diff --git a/indra/llmath/llcamera.h b/indra/llmath/llcamera.h index ec67b91d05..0b591be622 100644 --- a/indra/llmath/llcamera.h +++ b/indra/llmath/llcamera.h @@ -60,7 +60,7 @@ static const F32 MAX_FIELD_OF_VIEW = 175.f * DEG_TO_RAD;  // roll(), pitch(), yaw()  // etc... - +LL_ALIGN_PREFIX(16)  class LLCamera  : 	public LLCoordFrame  { @@ -108,7 +108,7 @@ public:  	};  private: -	LLPlane mAgentPlanes[7];  //frustum planes in agent space a la gluUnproject (I'm a bastard, I know) - DaveP +	LL_ALIGN_16(LLPlane mAgentPlanes[7]);  //frustum planes in agent space a la gluUnproject (I'm a bastard, I know) - DaveP  	U8 mPlaneMask[8];         // 8 for alignment	  	F32 mView;					// angle between top and bottom frustum planes in radians. @@ -116,13 +116,13 @@ private:  	S32 mViewHeightInPixels;	// for ViewHeightInPixels() only  	F32 mNearPlane;  	F32 mFarPlane; -	LLPlane mLocalPlanes[4]; +	LL_ALIGN_16(LLPlane mLocalPlanes[4]);  	F32 mFixedDistance;			// Always return this distance, unless < 0  	LLVector3 mFrustCenter;		// center of frustum and radius squared for ultra-quick exclusion test  	F32 mFrustRadiusSquared; -	LLPlane mWorldPlanes[PLANE_NUM]; -	LLPlane mHorizPlanes[HORIZ_PLANE_NUM]; +	LL_ALIGN_16(LLPlane mWorldPlanes[PLANE_NUM]); +	LL_ALIGN_16(LLPlane mHorizPlanes[HORIZ_PLANE_NUM]);  	U32 mPlaneCount;  //defaults to 6, if setUserClipPlane is called, uses user supplied clip plane in @@ -208,7 +208,7 @@ protected:  	void calculateFrustumPlanes(F32 left, F32 right, F32 top, F32 bottom);  	void calculateFrustumPlanesFromWindow(F32 x1, F32 y1, F32 x2, F32 y2);  	void calculateWorldFrustumPlanes(); -}; +} LL_ALIGN_POSTFIX(16);  #endif diff --git a/indra/llmath/llmath.h b/indra/llmath/llmath.h index 9297bcbac2..b93f89d674 100644 --- a/indra/llmath/llmath.h +++ b/indra/llmath/llmath.h @@ -85,7 +85,7 @@ const F32	F_ALMOST_ONE	= 1.0f - F_ALMOST_ZERO;  const F32 FP_MAG_THRESHOLD = 0.0000001f;  // TODO: Replace with logic like is_approx_equal -inline BOOL is_approx_zero( F32 f ) { return (-F_APPROXIMATELY_ZERO < f) && (f < F_APPROXIMATELY_ZERO); } +inline bool is_approx_zero( F32 f ) { return (-F_APPROXIMATELY_ZERO < f) && (f < F_APPROXIMATELY_ZERO); }  // These functions work by interpreting sign+exp+mantissa as an unsigned  // integer. @@ -111,13 +111,13 @@ inline BOOL is_approx_zero( F32 f ) { return (-F_APPROXIMATELY_ZERO < f) && (f <  // WARNING: Infinity is comparable with F32_MAX and negative   // infinity is comparable with F32_MIN -inline BOOL is_approx_equal(F32 x, F32 y) +inline bool is_approx_equal(F32 x, F32 y)  {  	const S32 COMPARE_MANTISSA_UP_TO_BIT = 0x02;  	return (std::abs((S32) ((U32&)x - (U32&)y) ) < COMPARE_MANTISSA_UP_TO_BIT);  } -inline BOOL is_approx_equal(F64 x, F64 y) +inline bool is_approx_equal(F64 x, F64 y)  {  	const S64 COMPARE_MANTISSA_UP_TO_BIT = 0x02;  	return (std::abs((S32) ((U64&)x - (U64&)y) ) < COMPARE_MANTISSA_UP_TO_BIT); diff --git a/indra/llmath/llmatrix3a.h b/indra/llmath/llmatrix3a.h index adb7e3389d..9916cfd2da 100644 --- a/indra/llmath/llmatrix3a.h +++ b/indra/llmath/llmatrix3a.h @@ -111,7 +111,7 @@ public:  protected: -	LLVector4a mColumns[3]; +	LL_ALIGN_16(LLVector4a mColumns[3]);  }; diff --git a/indra/llmath/llmatrix4a.h b/indra/llmath/llmatrix4a.h index 27cf5b79f6..c4cefdb4fa 100644 --- a/indra/llmath/llmatrix4a.h +++ b/indra/llmath/llmatrix4a.h @@ -34,7 +34,7 @@  class LLMatrix4a  {  public: -	LLVector4a mMatrix[4]; +	LL_ALIGN_16(LLVector4a mMatrix[4]);  	inline void clear()  	{ diff --git a/indra/llmath/lloctree.h b/indra/llmath/lloctree.h index 1b11e83b4a..c3f6f7de2a 100644 --- a/indra/llmath/lloctree.h +++ b/indra/llmath/lloctree.h @@ -31,7 +31,6 @@  #include "v3math.h"  #include "llvector4a.h"  #include <vector> -#include <set>  #define OCT_ERRS LL_WARNS("OctreeErrors") @@ -79,16 +78,18 @@ public:  	typedef LLOctreeTraveler<T>									oct_traveler;  	typedef LLTreeTraveler<T>									tree_traveler; -	typedef typename std::set<LLPointer<T> >					element_list; -	typedef typename element_list::iterator						element_iter; -	typedef typename element_list::const_iterator	const_element_iter; +	typedef LLPointer<T>*										element_list; +	typedef LLPointer<T>*										element_iter; +	typedef const LLPointer<T>*									const_element_iter;  	typedef typename std::vector<LLTreeListener<T>*>::iterator	tree_listener_iter; -	typedef typename std::vector<LLOctreeNode<T>* >				child_list; +	typedef LLOctreeNode<T>**									child_list; +	typedef LLOctreeNode<T>**									child_iter; +  	typedef LLTreeNode<T>		BaseType;  	typedef LLOctreeNode<T>		oct_node;  	typedef LLOctreeListener<T>	oct_listener; -	/*void* operator new(size_t size) +	void* operator new(size_t size)  	{  		return ll_aligned_malloc_16(size);  	} @@ -96,7 +97,7 @@ public:  	void operator delete(void* ptr)  	{  		ll_aligned_free_16(ptr); -	}*/ +	}  	LLOctreeNode(	const LLVector4a& center,   					const LLVector4a& size,  @@ -105,6 +106,9 @@ public:  	:	mParent((oct_node*)parent),   		mOctant(octant)   	{  +		mData = NULL; +		mDataEnd = NULL; +  		mCenter = center;  		mSize = size; @@ -123,6 +127,16 @@ public:  	{   		BaseType::destroyListeners();  +		for (U32 i = 0; i < mElementCount; ++i) +		{ +			mData[i]->setBinIndex(-1); +			mData[i] = NULL; +		} + +		free(mData); +		mData = NULL; +		mDataEnd = NULL; +  		for (U32 i = 0; i < getChildCount(); i++)  		{  			delete getChild(i); @@ -219,12 +233,17 @@ public:  	}  	void accept(oct_traveler* visitor)				{ visitor->visit(this); } -	virtual bool isLeaf() const						{ return mChild.empty(); } +	virtual bool isLeaf() const						{ return mChildCount == 0; }  	U32 getElementCount() const						{ return mElementCount; } +	bool isEmpty() const							{ return mElementCount == 0; }  	element_list& getData()							{ return mData; }  	const element_list& getData() const				{ return mData; } -	 +	element_iter getDataBegin()						{ return mData; } +	element_iter getDataEnd()						{ return mDataEnd; } +	const_element_iter getDataBegin() const			{ return mData; } +	const_element_iter getDataEnd() const			{ return mDataEnd; } +		  	U32 getChildCount()	const						{ return mChildCount; }  	oct_node* getChild(U32 index)					{ return mChild[index]; }  	const oct_node* getChild(U32 index) const		{ return mChild[index]; } @@ -289,7 +308,7 @@ public:  	virtual bool insert(T* data)  	{ -		if (data == NULL) +		if (data == NULL || data->getBinIndex() != -1)  		{  			OCT_ERRS << "!!! INVALID ELEMENT ADDED TO OCTREE BRANCH !!!" << llendl;  			return false; @@ -302,13 +321,16 @@ public:  			if ((getElementCount() < gOctreeMaxCapacity && contains(data->getBinRadius()) ||  				(data->getBinRadius() > getSize()[0] &&	parent && parent->getElementCount() >= gOctreeMaxCapacity)))   			{ //it belongs here -				//if this is a redundant insertion, error out (should never happen) -				llassert(mData.find(data) == mData.end()); +				mElementCount++; +				mData = (element_list) realloc(mData, sizeof(LLPointer<T>)*mElementCount); -				mData.insert(data); -				BaseType::insert(data); +				//avoid unref on uninitialized memory +				memset(mData+mElementCount-1, 0, sizeof(LLPointer<T>)); -				mElementCount = mData.size(); +				mData[mElementCount-1] = data; +				mDataEnd = mData + mElementCount; +				data->setBinIndex(mElementCount-1); +				BaseType::insert(data);  				return true;  			}  			else @@ -342,10 +364,16 @@ public:  				if( lt == 0x7 )  				{ -					mData.insert(data); -					BaseType::insert(data); +					mElementCount++; +					mData = (element_list) realloc(mData, sizeof(LLPointer<T>)*mElementCount); + +					//avoid unref on uninitialized memory +					memset(mData+mElementCount-1, 0, sizeof(LLPointer<T>)); -					mElementCount = mData.size(); +					mData[mElementCount-1] = data; +					mDataEnd = mData + mElementCount; +					data->setBinIndex(mElementCount-1); +					BaseType::insert(data);  					return true;  				} @@ -394,23 +422,59 @@ public:  		return false;  	} +	void _remove(T* data, S32 i) +	{ //precondition -- mElementCount > 0, idx is in range [0, mElementCount) + +		mElementCount--; +		data->setBinIndex(-1);  +		 +		if (mElementCount > 0) +		{ +			if (mElementCount != i) +			{ +				mData[i] = mData[mElementCount]; //might unref data, do not access data after this point +				mData[i]->setBinIndex(i); +			} + +			mData[mElementCount] = NULL; //needed for unref +			mData = (element_list) realloc(mData, sizeof(LLPointer<T>)*mElementCount); +			mDataEnd = mData+mElementCount; +		} +		else +		{ +			mData[0] = NULL; //needed for unref +			free(mData); +			mData = NULL; +			mDataEnd = NULL; +		} + +		notifyRemoval(data); +		checkAlive(); +	} +  	bool remove(T* data)  	{ -		if (mData.find(data) != mData.end()) -		{	//we have data -			mData.erase(data); -			mElementCount = mData.size(); -			notifyRemoval(data); -			checkAlive(); -			return true; -		} -		else if (isInside(data)) +		S32 i = data->getBinIndex(); + +		if (i >= 0 && i < mElementCount) +		{ +			if (mData[i] == data) +			{ //found it +				_remove(data, i); +				llassert(data->getBinIndex() == -1); +				return true; +			} +		} +		 +		if (isInside(data))  		{  			oct_node* dest = getNodeAt(data);  			if (dest != this)  			{ -				return dest->remove(data); +				bool ret = dest->remove(data); +				llassert(data->getBinIndex() == -1); +				return ret;  			}  		} @@ -429,19 +493,20 @@ public:  		//node is now root  		llwarns << "!!! OCTREE REMOVING FACE BY ADDRESS, SEVERE PERFORMANCE PENALTY |||" << llendl;  		node->removeByAddress(data); +		llassert(data->getBinIndex() == -1);  		return true;  	}  	void removeByAddress(T* data)  	{ -        if (mData.find(data) != mData.end()) +        for (U32 i = 0; i < mElementCount; ++i)  		{ -			mData.erase(data); -			mElementCount = mData.size(); -			notifyRemoval(data); -			llwarns << "FOUND!" << llendl; -			checkAlive(); -			return; +			if (mData[i] == data) +			{ //we have data +				_remove(data, i); +				llwarns << "FOUND!" << llendl; +				return; +			}  		}  		for (U32 i = 0; i < getChildCount(); i++) @@ -453,8 +518,8 @@ public:  	void clearChildren()  	{ -		mChild.clear();  		mChildCount = 0; +  		U32* foo = (U32*) mChildMap;  		foo[0] = foo[1] = 0xFFFFFFFF;  	} @@ -516,7 +581,7 @@ public:  		mChildMap[child->getOctant()] = mChildCount; -		mChild.push_back(child); +		mChild[mChildCount] = child;  		++mChildCount;  		child->setParent(this); @@ -543,9 +608,12 @@ public:  			mChild[index]->destroy();  			delete mChild[index];  		} -		mChild.erase(mChild.begin() + index); +  		--mChildCount; +		mChild[index] = mChild[mChildCount]; +		 +  		//rebuild child map  		U32* foo = (U32*) mChildMap;  		foo[0] = foo[1] = 0xFFFFFFFF; @@ -601,11 +669,12 @@ protected:  	oct_node* mParent;  	U8 mOctant; -	child_list mChild; +	LLOctreeNode<T>* mChild[8];  	U8 mChildMap[8];  	U32 mChildCount;  	element_list mData; +	element_iter mDataEnd;  	U32 mElementCount;  };  diff --git a/indra/llmath/llplane.h b/indra/llmath/llplane.h index a611894721..3c32441b11 100644 --- a/indra/llmath/llplane.h +++ b/indra/llmath/llplane.h @@ -36,6 +36,8 @@  // The plane normal = [A, B, C]  // The closest approach = D / sqrt(A*A + B*B + C*C) + +LL_ALIGN_PREFIX(16)  class LLPlane  {  public: @@ -94,7 +96,7 @@ public:  private:  	LLVector4a mV; -}; +} LL_ALIGN_POSTFIX(16); diff --git a/indra/llmath/llsimdmath.h b/indra/llmath/llsimdmath.h index c7cdf7b32c..01458521ec 100644 --- a/indra/llmath/llsimdmath.h +++ b/indra/llmath/llsimdmath.h @@ -67,11 +67,10 @@ template <typename T> T* LL_NEXT_ALIGNED_ADDRESS_64(T* address)  #define LL_ALIGN_16(var) LL_ALIGN_PREFIX(16) var LL_ALIGN_POSTFIX(16) - -  #include <xmmintrin.h>  #include <emmintrin.h> +#include "llmemory.h"  #include "llsimdtypes.h"  #include "llsimdtypes.inl" diff --git a/indra/llmath/llsimdtypes.inl b/indra/llmath/llsimdtypes.inl index 712239e425..e905c84954 100644 --- a/indra/llmath/llsimdtypes.inl +++ b/indra/llmath/llsimdtypes.inl @@ -62,6 +62,7 @@ inline LLSimdScalar operator/(const LLSimdScalar& a, const LLSimdScalar& b)  inline LLSimdScalar operator-(const LLSimdScalar& a)  {  	static LL_ALIGN_16(const U32 signMask[4]) = {0x80000000, 0x80000000, 0x80000000, 0x80000000 }; +	ll_assert_aligned(signMask,16);  	return _mm_xor_ps(*reinterpret_cast<const LLQuad*>(signMask), a);  } @@ -146,6 +147,7 @@ inline LLSimdScalar& LLSimdScalar::operator/=(const LLSimdScalar& rhs)  inline LLSimdScalar LLSimdScalar::getAbs() const  {  	static const LL_ALIGN_16(U32 F_ABS_MASK_4A[4]) = { 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF }; +	ll_assert_aligned(F_ABS_MASK_4A,16);  	return _mm_and_ps( mQ, *reinterpret_cast<const LLQuad*>(F_ABS_MASK_4A));  } diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp index b66b7a7076..6edeb0fefe 100644 --- a/indra/llmath/llvector4a.cpp +++ b/indra/llmath/llvector4a.cpp @@ -24,6 +24,7 @@   * $/LicenseInfo$   */ +#include "llmemory.h"  #include "llmath.h"  #include "llquantize.h" @@ -44,7 +45,10 @@ extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast<const LLVector4a&> ( F  	assert(dst != NULL);  	assert(bytes > 0);  	assert((bytes % sizeof(F32))== 0);  -	 +	ll_assert_aligned(src,16); +	ll_assert_aligned(dst,16); +	assert(bytes%16==0); +  	F32* end = dst + (bytes / sizeof(F32) );  	if (bytes > 64) @@ -189,6 +193,8 @@ void LLVector4a::quantize16( const LLVector4a& low, const LLVector4a& high )  		LLVector4a oneOverDelta;  		{  			static LL_ALIGN_16( const F32 F_TWO_4A[4] ) = { 2.f, 2.f, 2.f, 2.f }; +			ll_assert_aligned(F_TWO_4A,16); +			  			LLVector4a two; two.load4a( F_TWO_4A );  			// Here we use _mm_rcp_ps plus one round of newton-raphson diff --git a/indra/llmath/llvector4a.h b/indra/llmath/llvector4a.h index 596082509d..0526793d3a 100644 --- a/indra/llmath/llvector4a.h +++ b/indra/llmath/llvector4a.h @@ -32,6 +32,7 @@ class LLRotation;  #include <assert.h>  #include "llpreprocessor.h" +#include "llmemory.h"  ///////////////////////////////////  // FIRST TIME USERS PLEASE READ @@ -46,6 +47,7 @@ class LLRotation;  // LLVector3/LLVector4.   ///////////////////////////////// +LL_ALIGN_PREFIX(16)  class LLVector4a  {  public: @@ -82,6 +84,7 @@ public:  	}  	// Copy words 16-byte blocks from src to dst. Source and destination must not overlap.  +	// Source and dest must be 16-byte aligned and size must be multiple of 16.  	static void memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes);  	//////////////////////////////////// @@ -90,6 +93,7 @@ public:  	LLVector4a()  	{ //DO NOT INITIALIZE -- The overhead is completely unnecessary +		ll_assert_aligned(this,16);  	}  	LLVector4a(F32 x, F32 y, F32 z, F32 w = 0.f) @@ -313,7 +317,7 @@ public:  private:  	LLQuad mQ; -}; +} LL_ALIGN_POSTFIX(16);  inline void update_min_max(LLVector4a& min, LLVector4a& max, const LLVector4a& p)  { diff --git a/indra/llmath/llvector4a.inl b/indra/llmath/llvector4a.inl index 7ad22a5631..7c52ffef21 100644 --- a/indra/llmath/llvector4a.inl +++ b/indra/llmath/llvector4a.inl @@ -475,6 +475,7 @@ inline void LLVector4a::setLerp(const LLVector4a& lhs, const LLVector4a& rhs, F3  inline LLBool32 LLVector4a::isFinite3() const  {  	static LL_ALIGN_16(const U32 nanOrInfMask[4]) = { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 }; +	ll_assert_aligned(nanOrInfMask,16);  	const __m128i nanOrInfMaskV = *reinterpret_cast<const __m128i*> (nanOrInfMask);  	const __m128i maskResult = _mm_and_si128( _mm_castps_si128(mQ), nanOrInfMaskV );  	const LLVector4Logical equalityCheck = _mm_castsi128_ps(_mm_cmpeq_epi32( maskResult, nanOrInfMaskV )); diff --git a/indra/llmath/llvector4logical.h b/indra/llmath/llvector4logical.h index dd66b09d43..c5698f7cea 100644 --- a/indra/llmath/llvector4logical.h +++ b/indra/llmath/llvector4logical.h @@ -27,6 +27,7 @@  #ifndef	LL_VECTOR4LOGICAL_H  #define	LL_VECTOR4LOGICAL_H +#include "llmemory.h"  ////////////////////////////  // LLVector4Logical @@ -77,6 +78,7 @@ public:  	inline LLVector4Logical& invert()  	{  		static const LL_ALIGN_16(U32 allOnes[4]) = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF }; +		ll_assert_aligned(allOnes,16);  		mQ = _mm_andnot_ps( mQ, *(LLQuad*)(allOnes) );  		return *this;  	} diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index cc9744756f..c85e1b1fb3 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -35,7 +35,6 @@  #include <cmath>  #include "llerror.h" -#include "llmemtype.h"  #include "llvolumemgr.h"  #include "v2math.h" @@ -95,17 +94,6 @@ const S32 SCULPT_MIN_AREA_DETAIL = 1;  extern BOOL gDebugGL; -void assert_aligned(void* ptr, uintptr_t alignment) -{ -#if 0 -	uintptr_t t = (uintptr_t) ptr; -	if (t%alignment != 0) -	{ -		llerrs << "Alignment check failed." << llendl; -	} -#endif -} -  BOOL check_same_clock_dir( const LLVector3& pt1, const LLVector3& pt2, const LLVector3& pt3, const LLVector3& norm)  {      	LLVector3 test = (pt2-pt1)%(pt3-pt2); @@ -328,16 +316,16 @@ public:  		LLVector4a& min = node->mExtents[0];  		LLVector4a& max = node->mExtents[1]; -		if (!branch->getData().empty()) +		if (!branch->isEmpty())  		{ //node has data, find AABB that binds data set -			const LLVolumeTriangle* tri = *(branch->getData().begin()); +			const LLVolumeTriangle* tri = *(branch->getDataBegin());  			//initialize min/max to first available vertex  			min = *(tri->mV[0]);  			max = *(tri->mV[0]);  			for (LLOctreeNode<LLVolumeTriangle>::const_element_iter iter =  -				branch->getData().begin(); iter != branch->getData().end(); ++iter) +				branch->getDataBegin(); iter != branch->getDataEnd(); ++iter)  			{ //for each triangle in node  				//stretch by triangles in node @@ -352,7 +340,7 @@ public:  				max.setMax(max, *tri->mV[2]);  			}  		} -		else if (!branch->getChildren().empty()) +		else if (!branch->isLeaf())  		{ //no data, but child nodes exist  			LLVolumeOctreeListener* child = (LLVolumeOctreeListener*) branch->getChild(0)->getListener(0); @@ -389,8 +377,6 @@ public:  LLProfile::Face* LLProfile::addCap(S16 faceID)  { -	LLMemType m1(LLMemType::MTYPE_VOLUME); -	  	Face *face   = vector_append(mFaces, 1);  	face->mIndex = 0; @@ -403,8 +389,6 @@ LLProfile::Face* LLProfile::addCap(S16 faceID)  LLProfile::Face* LLProfile::addFace(S32 i, S32 count, F32 scaleU, S16 faceID, BOOL flat)  { -	LLMemType m1(LLMemType::MTYPE_VOLUME); -	  	Face *face   = vector_append(mFaces, 1);  	face->mIndex = i; @@ -420,7 +404,6 @@ LLProfile::Face* LLProfile::addFace(S32 i, S32 count, F32 scaleU, S16 faceID, BO  //static  S32 LLProfile::getNumNGonPoints(const LLProfileParams& params, S32 sides, F32 offset, F32 bevel, F32 ang_scale, S32 split)  { // this is basically LLProfile::genNGon stripped down to only the operations that influence the number of points -	LLMemType m1(LLMemType::MTYPE_VOLUME);  	S32 np = 0;  	// Generate an n-sided "circular" path. @@ -486,8 +469,6 @@ S32 LLProfile::getNumNGonPoints(const LLProfileParams& params, S32 sides, F32 of  // filleted and chamfered corners  void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F32 bevel, F32 ang_scale, S32 split)  { -	LLMemType m1(LLMemType::MTYPE_VOLUME); -	  	// Generate an n-sided "circular" path.  	// 0 is (1,0), and we go counter-clockwise along a circular path from there.  	const F32 tableScale[] = { 1, 1, 1, 0.5f, 0.707107f, 0.53f, 0.525f, 0.5f }; @@ -741,8 +722,6 @@ LLProfile::Face* LLProfile::addHole(const LLProfileParams& params, BOOL flat, F3  S32 LLProfile::getNumPoints(const LLProfileParams& params, BOOL path_open,F32 detail, S32 split,  						 BOOL is_sculpted, S32 sculpt_size)  { // this is basically LLProfile::generate stripped down to only operations that influence the number of points -	LLMemType m1(LLMemType::MTYPE_VOLUME); -	  	if (detail < MIN_LOD)  	{  		detail = MIN_LOD; @@ -853,8 +832,6 @@ S32 LLProfile::getNumPoints(const LLProfileParams& params, BOOL path_open,F32 de  BOOL LLProfile::generate(const LLProfileParams& params, BOOL path_open,F32 detail, S32 split,  						 BOOL is_sculpted, S32 sculpt_size)  { -	LLMemType m1(LLMemType::MTYPE_VOLUME); -	  	if ((!mDirty) && (!is_sculpted))  	{  		return FALSE; @@ -1127,8 +1104,6 @@ BOOL LLProfile::generate(const LLProfileParams& params, BOOL path_open,F32 detai  BOOL LLProfileParams::importFile(LLFILE *fp)  { -	LLMemType m1(LLMemType::MTYPE_VOLUME); -	  	const S32 BUFSIZE = 16384;  	char buffer[BUFSIZE];	/* Flawfinder: ignore */  	// *NOTE: changing the size or type of these buffers will require @@ -1204,8 +1179,6 @@ BOOL LLProfileParams::exportFile(LLFILE *fp) const  BOOL LLProfileParams::importLegacyStream(std::istream& input_stream)  { -	LLMemType m1(LLMemType::MTYPE_VOLUME); -	  	const S32 BUFSIZE = 16384;  	char buffer[BUFSIZE];	/* Flawfinder: ignore */  	// *NOTE: changing the size or type of these buffers will require @@ -1297,7 +1270,6 @@ bool LLProfileParams::fromLLSD(LLSD& sd)  void LLProfileParams::copyParams(const LLProfileParams ¶ms)  { -	LLMemType m1(LLMemType::MTYPE_VOLUME);  	setCurveType(params.getCurveType());  	setBegin(params.getBegin());  	setEnd(params.getEnd()); @@ -1514,8 +1486,6 @@ const LLVector2 LLPathParams::getEndScale() const  S32 LLPath::getNumPoints(const LLPathParams& params, F32 detail)  { // this is basically LLPath::generate stripped down to only the operations that influence the number of points -	LLMemType m1(LLMemType::MTYPE_VOLUME); -	  	if (detail < MIN_LOD)  	{  		detail = MIN_LOD; @@ -1565,8 +1535,6 @@ S32 LLPath::getNumPoints(const LLPathParams& params, F32 detail)  BOOL LLPath::generate(const LLPathParams& params, F32 detail, S32 split,  					  BOOL is_sculpted, S32 sculpt_size)  { -	LLMemType m1(LLMemType::MTYPE_VOLUME); -	  	if ((!mDirty) && (!is_sculpted))  	{  		return FALSE; @@ -1694,8 +1662,6 @@ BOOL LLPath::generate(const LLPathParams& params, F32 detail, S32 split,  BOOL LLDynamicPath::generate(const LLPathParams& params, F32 detail, S32 split,  							 BOOL is_sculpted, S32 sculpt_size)  { -	LLMemType m1(LLMemType::MTYPE_VOLUME); -	  	mOpen = TRUE; // Draw end caps  	if (getPathLength() == 0)  	{ @@ -1717,8 +1683,6 @@ BOOL LLDynamicPath::generate(const LLPathParams& params, F32 detail, S32 split,  BOOL LLPathParams::importFile(LLFILE *fp)  { -	LLMemType m1(LLMemType::MTYPE_VOLUME); -	  	const S32 BUFSIZE = 16384;  	char buffer[BUFSIZE];	/* Flawfinder: ignore */  	// *NOTE: changing the size or type of these buffers will require @@ -1863,8 +1827,6 @@ BOOL LLPathParams::exportFile(LLFILE *fp) const  BOOL LLPathParams::importLegacyStream(std::istream& input_stream)  { -	LLMemType m1(LLMemType::MTYPE_VOLUME); -	  	const S32 BUFSIZE = 16384;  	char buffer[BUFSIZE];	/* Flawfinder: ignore */  	// *NOTE: changing the size or type of these buffers will require @@ -2072,8 +2034,6 @@ S32 LLVolume::sNumMeshPoints = 0;  LLVolume::LLVolume(const LLVolumeParams ¶ms, const F32 detail, const BOOL generate_single_face, const BOOL is_unique)  	: mParams(params)  { -	LLMemType m1(LLMemType::MTYPE_VOLUME); -	  	mUnique = is_unique;  	mFaceMask = 0x0;  	mDetail = detail; @@ -2145,7 +2105,6 @@ LLVolume::~LLVolume()  BOOL LLVolume::generate()  { -	LLMemType m1(LLMemType::MTYPE_VOLUME);  	llassert_always(mProfilep);  	//Added 10.03.05 Dave Parks @@ -2741,8 +2700,6 @@ S32	LLVolume::getNumFaces() const  void LLVolume::createVolumeFaces()  { -	LLMemType m1(LLMemType::MTYPE_VOLUME); -  	if (mGenerateSingleFace)  	{  		// do nothing @@ -2914,8 +2871,6 @@ F32 LLVolume::sculptGetSurfaceArea()  // create placeholder shape  void LLVolume::sculptGeneratePlaceholder()  { -	LLMemType m1(LLMemType::MTYPE_VOLUME); -	  	S32 sizeS = mPathp->mPath.size();  	S32 sizeT = mProfilep->mProfile.size(); @@ -2952,9 +2907,6 @@ void LLVolume::sculptGenerateMapVertices(U16 sculpt_width, U16 sculpt_height, S8  	BOOL sculpt_mirror = sculpt_type & LL_SCULPT_FLAG_MIRROR;  	BOOL reverse_horizontal = (sculpt_invert ? !sculpt_mirror : sculpt_mirror);  // XOR -	 -	LLMemType m1(LLMemType::MTYPE_VOLUME); -	  	S32 sizeS = mPathp->mPath.size();  	S32 sizeT = mProfilep->mProfile.size(); @@ -3103,7 +3055,6 @@ void sculpt_calc_mesh_resolution(U16 width, U16 height, U8 type, F32 detail, S32  // sculpt replaces generate() for sculpted surfaces  void LLVolume::sculpt(U16 sculpt_width, U16 sculpt_height, S8 sculpt_components, const U8* sculpt_data, S32 sculpt_level)  { -	LLMemType m1(LLMemType::MTYPE_VOLUME);      U8 sculpt_type = mParams.getSculptType();  	BOOL data_is_empty = FALSE; @@ -3240,7 +3191,6 @@ bool LLVolumeParams::operator<(const LLVolumeParams ¶ms) const  void LLVolumeParams::copyParams(const LLVolumeParams ¶ms)  { -	LLMemType m1(LLMemType::MTYPE_VOLUME);  	mProfileParams.copyParams(params.mProfileParams);  	mPathParams.copyParams(params.mPathParams);  	mSculptID = params.getSculptID(); @@ -3612,8 +3562,6 @@ bool LLVolumeParams::validate(U8 prof_curve, F32 prof_begin, F32 prof_end, F32 h  S32 *LLVolume::getTriangleIndices(U32 &num_indices) const  { -	LLMemType m1(LLMemType::MTYPE_VOLUME); -	  	S32 expected_num_triangle_indices = getNumTriangleIndices();  	if (expected_num_triangle_indices > MAX_VOLUME_TRIANGLE_INDICES)  	{ @@ -4341,8 +4289,6 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices,  										  const LLMatrix3& norm_mat_in,  										  S32 face_mask)  { -	LLMemType m1(LLMemType::MTYPE_VOLUME); -  	LLMatrix4a mat;  	mat.loadu(mat_in); @@ -4804,241 +4750,8 @@ BOOL equalTriangle(const S32 *a, const S32 *b)  	return FALSE;  } -BOOL LLVolume::cleanupTriangleData( const S32 num_input_vertices, -									const std::vector<Point>& input_vertices, -									const S32 num_input_triangles, -									S32 *input_triangles, -									S32 &num_output_vertices, -									LLVector3 **output_vertices, -									S32 &num_output_triangles, -									S32 **output_triangles) -{ -	LLMemType m1(LLMemType::MTYPE_VOLUME); -	 -	/* Testing: avoid any cleanup -	static BOOL skip_cleanup = TRUE; -	if ( skip_cleanup ) -	{ -		num_output_vertices = num_input_vertices; -		num_output_triangles = num_input_triangles; - -		*output_vertices = new LLVector3[num_input_vertices]; -		for (S32 index = 0; index < num_input_vertices; index++) -		{ -			(*output_vertices)[index] = input_vertices[index].mPos; -		} - -		*output_triangles = new S32[num_input_triangles*3]; -		memcpy(*output_triangles, input_triangles, 3*num_input_triangles*sizeof(S32));		// Flawfinder: ignore -		return TRUE; -	} -	*/ - -	// Here's how we do this: -	// Create a structure which contains the original vertex index and the -	// LLVector3 data. -	// "Sort" the data by the vectors -	// Create an array the size of the old vertex list, with a mapping of -	// old indices to new indices. -	// Go through triangles, shift so the lowest index is first -	// Sort triangles by first index -	// Remove duplicate triangles -	// Allocate and pack new triangle data. - -	//LLTimer cleanupTimer; -	//llinfos << "In vertices: " << num_input_vertices << llendl; -	//llinfos << "In triangles: " << num_input_triangles << llendl; - -	S32 i; -	typedef std::multiset<LLVertexIndexPair*, lessVertex> vertex_set_t; -	vertex_set_t vertex_list; - -	LLVertexIndexPair *pairp = NULL; -	for (i = 0; i < num_input_vertices; i++) -	{ -		LLVertexIndexPair *new_pairp = new LLVertexIndexPair(input_vertices[i].mPos, i); -		vertex_list.insert(new_pairp); -	} - -	// Generate the vertex mapping and the list of vertices without -	// duplicates.  This will crash if there are no vertices. -	llassert(num_input_vertices > 0); // check for no vertices! -	S32 *vertex_mapping = new S32[num_input_vertices]; -	LLVector3 *new_vertices = new LLVector3[num_input_vertices]; -	LLVertexIndexPair *prev_pairp = NULL; - -	S32 new_num_vertices; - -	new_num_vertices = 0; -	for (vertex_set_t::iterator iter = vertex_list.begin(), -			 end = vertex_list.end(); -		 iter != end; iter++) -	{ -		pairp = *iter; -		if (!prev_pairp || ((pairp->mVertex - prev_pairp->mVertex).magVecSquared() >= VERTEX_SLOP_SQRD))	 -		{ -			new_vertices[new_num_vertices] = pairp->mVertex; -			//llinfos << "Added vertex " << new_num_vertices << " : " << pairp->mVertex << llendl; -			new_num_vertices++; -			// Update the previous -			prev_pairp = pairp; -		} -		else -		{ -			//llinfos << "Removed duplicate vertex " << pairp->mVertex << ", distance magVecSquared() is " << (pairp->mVertex - prev_pairp->mVertex).magVecSquared() << llendl; -		} -		vertex_mapping[pairp->mIndex] = new_num_vertices - 1; -	} - -	// Iterate through triangles and remove degenerates, re-ordering vertices -	// along the way. -	S32 *new_triangles = new S32[num_input_triangles * 3]; -	S32 new_num_triangles = 0; - -	for (i = 0; i < num_input_triangles; i++) -	{ -		S32 v1 = i*3; -		S32 v2 = v1 + 1; -		S32 v3 = v1 + 2; - -		//llinfos << "Checking triangle " << input_triangles[v1] << ":" << input_triangles[v2] << ":" << input_triangles[v3] << llendl; -		input_triangles[v1] = vertex_mapping[input_triangles[v1]]; -		input_triangles[v2] = vertex_mapping[input_triangles[v2]]; -		input_triangles[v3] = vertex_mapping[input_triangles[v3]]; - -		if ((input_triangles[v1] == input_triangles[v2]) -			|| (input_triangles[v1] == input_triangles[v3]) -			|| (input_triangles[v2] == input_triangles[v3])) -		{ -			//llinfos << "Removing degenerate triangle " << input_triangles[v1] << ":" << input_triangles[v2] << ":" << input_triangles[v3] << llendl; -			// Degenerate triangle, skip -			continue; -		} - -		if (input_triangles[v1] < input_triangles[v2]) -		{ -			if (input_triangles[v1] < input_triangles[v3]) -			{ -				// (0 < 1) && (0 < 2) -				new_triangles[new_num_triangles*3] = input_triangles[v1]; -				new_triangles[new_num_triangles*3+1] = input_triangles[v2]; -				new_triangles[new_num_triangles*3+2] = input_triangles[v3]; -			} -			else -			{ -				// (0 < 1) && (2 < 0) -				new_triangles[new_num_triangles*3] = input_triangles[v3]; -				new_triangles[new_num_triangles*3+1] = input_triangles[v1]; -				new_triangles[new_num_triangles*3+2] = input_triangles[v2]; -			} -		} -		else if (input_triangles[v2] < input_triangles[v3]) -		{ -			// (1 < 0) && (1 < 2) -			new_triangles[new_num_triangles*3] = input_triangles[v2]; -			new_triangles[new_num_triangles*3+1] = input_triangles[v3]; -			new_triangles[new_num_triangles*3+2] = input_triangles[v1]; -		} -		else -		{ -			// (1 < 0) && (2 < 1) -			new_triangles[new_num_triangles*3] = input_triangles[v3]; -			new_triangles[new_num_triangles*3+1] = input_triangles[v1]; -			new_triangles[new_num_triangles*3+2] = input_triangles[v2]; -		} -		new_num_triangles++; -	} - -	if (new_num_triangles == 0) -	{ -		llwarns << "Created volume object with 0 faces." << llendl; -		delete[] new_triangles; -		delete[] vertex_mapping; -		delete[] new_vertices; -		return FALSE; -	} - -	typedef std::set<S32*, lessTriangle> triangle_set_t; -	triangle_set_t triangle_list; - -	for (i = 0; i < new_num_triangles; i++) -	{ -		triangle_list.insert(&new_triangles[i*3]); -	} - -	// Sort through the triangle list, and delete duplicates - -	S32 *prevp = NULL; -	S32 *curp = NULL; - -	S32 *sorted_tris = new S32[new_num_triangles*3]; -	S32 cur_tri = 0; -	for (triangle_set_t::iterator iter = triangle_list.begin(), -			 end = triangle_list.end(); -		 iter != end; iter++) -	{ -		curp = *iter; -		if (!prevp || !equalTriangle(prevp, curp)) -		{ -			//llinfos << "Added triangle " << *curp << ":" << *(curp+1) << ":" << *(curp+2) << llendl; -			sorted_tris[cur_tri*3] = *curp; -			sorted_tris[cur_tri*3+1] = *(curp+1); -			sorted_tris[cur_tri*3+2] = *(curp+2); -			cur_tri++; -			prevp = curp; -		} -		else -		{ -			//llinfos << "Skipped triangle " << *curp << ":" << *(curp+1) << ":" << *(curp+2) << llendl; -		} -	} - -	*output_vertices = new LLVector3[new_num_vertices]; -	num_output_vertices = new_num_vertices; -	for (i = 0; i < new_num_vertices; i++) -	{ -		(*output_vertices)[i] = new_vertices[i]; -	} - -	*output_triangles = new S32[cur_tri*3]; -	num_output_triangles = cur_tri; -	memcpy(*output_triangles, sorted_tris, 3*cur_tri*sizeof(S32));		/* Flawfinder: ignore */ - -	/* -	llinfos << "Out vertices: " << num_output_vertices << llendl; -	llinfos << "Out triangles: " << num_output_triangles << llendl; -	for (i = 0; i < num_output_vertices; i++) -	{ -		llinfos << i << ":" << (*output_vertices)[i] << llendl; -	} -	for (i = 0; i < num_output_triangles; i++) -	{ -		llinfos << i << ":" << (*output_triangles)[i*3] << ":" << (*output_triangles)[i*3+1] << ":" << (*output_triangles)[i*3+2] << llendl; -	} -	*/ - -	//llinfos << "Out vertices: " << num_output_vertices << llendl; -	//llinfos << "Out triangles: " << num_output_triangles << llendl; -	delete[] vertex_mapping; -	vertex_mapping = NULL; -	delete[] new_vertices; -	new_vertices = NULL; -	delete[] new_triangles; -	new_triangles = NULL; -	delete[] sorted_tris; -	sorted_tris = NULL; -	triangle_list.clear(); -	std::for_each(vertex_list.begin(), vertex_list.end(), DeletePointer()); -	vertex_list.clear(); -	 -	return TRUE; -} - -  BOOL LLVolumeParams::importFile(LLFILE *fp)  { -	LLMemType m1(LLMemType::MTYPE_VOLUME); -	  	//llinfos << "importing volume" << llendl;  	const S32 BUFSIZE = 16384;  	char buffer[BUFSIZE];	/* Flawfinder: ignore */ @@ -5093,8 +4806,6 @@ BOOL LLVolumeParams::exportFile(LLFILE *fp) const  BOOL LLVolumeParams::importLegacyStream(std::istream& input_stream)  { -	LLMemType m1(LLMemType::MTYPE_VOLUME); -	  	//llinfos << "importing volume" << llendl;  	const S32 BUFSIZE = 16384;  	// *NOTE: changing the size or type of this buffer will require @@ -5134,8 +4845,6 @@ BOOL LLVolumeParams::importLegacyStream(std::istream& input_stream)  BOOL LLVolumeParams::exportLegacyStream(std::ostream& output_stream) const  { -	LLMemType m1(LLMemType::MTYPE_VOLUME); -	  	output_stream <<"\tshape 0\n";  	output_stream <<"\t{\n";  	mPathParams.exportLegacyStream(output_stream); @@ -6351,8 +6060,6 @@ void	LerpPlanarVertex(LLVolumeFace::VertexData& v0,  BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build)  { -	LLMemType m1(LLMemType::MTYPE_VOLUME); -	  	const std::vector<LLVolume::Point>& mesh = volume->getMesh();  	const std::vector<LLVector3>& profile = volume->getProfile().mProfile;  	S32 max_s = volume->getProfile().getTotal(); @@ -6503,8 +6210,6 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build)  BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)  { -	LLMemType m1(LLMemType::MTYPE_VOLUME); -	  	if (!(mTypeMask & HOLLOW_MASK) &&   		!(mTypeMask & OPEN_MASK) &&   		((volume->getParams().getPathParams().getBegin()==0.0f)&& @@ -6891,8 +6596,6 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)  void LLVolumeFace::createBinormals()  { -	LLMemType m1(LLMemType::MTYPE_VOLUME); -	  	if (!mBinormals)  	{  		allocateBinormals(mNumVertices); @@ -6962,14 +6665,14 @@ void LLVolumeFace::resizeVertices(S32 num_verts)  	if (num_verts)  	{  		mPositions = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); -		assert_aligned(mPositions, 16); +		ll_assert_aligned(mPositions, 16);  		mNormals = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); -		assert_aligned(mNormals, 16); +		ll_assert_aligned(mNormals, 16);  		//pad texture coordinate block end to allow for QWORD reads  		S32 size = ((num_verts*sizeof(LLVector2)) + 0xF) & ~0xF;  		mTexCoords = (LLVector2*) ll_aligned_malloc_16(size); -		assert_aligned(mTexCoords, 16); +		ll_assert_aligned(mTexCoords, 16);  	}  	else  	{ @@ -6993,14 +6696,17 @@ void LLVolumeFace::pushVertex(const LLVector4a& pos, const LLVector4a& norm, con  //	S32 old_size = mNumVertices*16;  	//positions -	mPositions = (LLVector4a*) realloc(mPositions, new_size); +	mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_size); +	ll_assert_aligned(mPositions,16);  	//normals -	mNormals = (LLVector4a*) realloc(mNormals, new_size); -	 +	mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_size); +	ll_assert_aligned(mNormals,16); +  	//tex coords  	new_size = ((new_verts*8)+0xF) & ~0xF; -	mTexCoords = (LLVector2*) realloc(mTexCoords, new_size); +	mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, new_size); +	ll_assert_aligned(mTexCoords,16);  	//just clear binormals @@ -7053,7 +6759,8 @@ void LLVolumeFace::pushIndex(const U16& idx)  	S32 old_size = ((mNumIndices*2)+0xF) & ~0xF;  	if (new_size != old_size)  	{ -		mIndices = (U16*) realloc(mIndices, new_size); +		mIndices = (U16*) ll_aligned_realloc_16(mIndices, new_size); +		ll_assert_aligned(mIndices,16);  	}  	mIndices[mNumIndices++] = idx; @@ -7094,12 +6801,12 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat  	}  	//allocate new buffer space -	mPositions = (LLVector4a*) realloc(mPositions, new_count*sizeof(LLVector4a)); -	assert_aligned(mPositions, 16); -	mNormals = (LLVector4a*) realloc(mNormals, new_count*sizeof(LLVector4a)); -	assert_aligned(mNormals, 16); -	mTexCoords = (LLVector2*) realloc(mTexCoords, (new_count*sizeof(LLVector2)+0xF) & ~0xF); -	assert_aligned(mTexCoords, 16); +	mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_count*sizeof(LLVector4a)); +	ll_assert_aligned(mPositions, 16); +	mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_count*sizeof(LLVector4a)); +	ll_assert_aligned(mNormals, 16); +	mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, (new_count*sizeof(LLVector2)+0xF) & ~0xF); +	ll_assert_aligned(mTexCoords, 16);  	mNumVertices = new_count; @@ -7145,7 +6852,7 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat  	new_count = mNumIndices + face.mNumIndices;  	//allocate new index buffer -	mIndices = (U16*) realloc(mIndices, (new_count*sizeof(U16)+0xF) & ~0xF); +	mIndices = (U16*) ll_aligned_realloc_16(mIndices, (new_count*sizeof(U16)+0xF) & ~0xF);  	//get destination address into new index buffer  	U16* dst_idx = mIndices+mNumIndices; @@ -7159,8 +6866,6 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat  BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)  { -	LLMemType m1(LLMemType::MTYPE_VOLUME); -	  	BOOL flat = mTypeMask & FLAT_MASK;  	U8 sculpt_type = volume->getParams().getSculptType(); diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h index 76cf9de613..c845556557 100644 --- a/indra/llmath/llvolume.h +++ b/indra/llmath/llvolume.h @@ -54,6 +54,7 @@ class LLVolumeTriangle;  #include "llstrider.h"  #include "v4coloru.h"  #include "llrefcount.h" +#include "llpointer.h"  #include "llfile.h"  //============================================================================ @@ -919,6 +920,10 @@ public:  	LLVector2*  mTexCoords;  	U16* mIndices; +	//vertex buffer filled in by LLFace to cache this volume face geometry in vram  +	// (declared as a LLPointer to LLRefCount to avoid dependency on LLVertexBuffer) +	mutable LLPointer<LLRefCount> mVertexBuffer;  +  	std::vector<S32>	mEdge;  	//list of skin weights for rigged volumes @@ -1018,17 +1023,6 @@ public:  								   LLVector3* normal = NULL,  								   LLVector3* bi_normal = NULL); -	// The following cleans up vertices and triangles, -	// getting rid of degenerate triangles and duplicate vertices, -	// and allocates new arrays with the clean data. -	static BOOL cleanupTriangleData( const S32 num_input_vertices, -								const std::vector<Point> &input_vertices, -								const S32 num_input_triangles, -								S32 *input_triangles, -								S32 &num_output_vertices, -								LLVector3 **output_vertices, -								S32 &num_output_triangles, -								S32 **output_triangles);  	LLFaceID generateFaceMask();  	BOOL isFaceMaskValid(LLFaceID face_mask); diff --git a/indra/llmath/llvolumemgr.cpp b/indra/llmath/llvolumemgr.cpp index c60b750088..9083273ee5 100644 --- a/indra/llmath/llvolumemgr.cpp +++ b/indra/llmath/llvolumemgr.cpp @@ -26,7 +26,6 @@  #include "linden_common.h"  #include "llvolumemgr.h" -#include "llmemtype.h"  #include "llvolume.h" @@ -182,7 +181,6 @@ void LLVolumeMgr::insertGroup(LLVolumeLODGroup* volgroup)  // protected  LLVolumeLODGroup* LLVolumeMgr::createNewGroup(const LLVolumeParams& volume_params)  { -	LLMemType m1(LLMemType::MTYPE_VOLUME);  	LLVolumeLODGroup* volgroup = new LLVolumeLODGroup(volume_params);  	insertGroup(volgroup);  	return volgroup; @@ -297,7 +295,6 @@ LLVolume* LLVolumeLODGroup::refLOD(const S32 detail)  	mRefs++;  	if (mVolumeLODs[detail].isNull())  	{ -		LLMemType m1(LLMemType::MTYPE_VOLUME);  		mVolumeLODs[detail] = new LLVolume(mVolumeParams, mDetailScales[detail]);  	}  	mLODRefs[detail]++; diff --git a/indra/llmath/llvolumeoctree.cpp b/indra/llmath/llvolumeoctree.cpp index b5a935c2b5..cc83cb7235 100644 --- a/indra/llmath/llvolumeoctree.cpp +++ b/indra/llmath/llvolumeoctree.cpp @@ -131,7 +131,7 @@ void LLOctreeTriangleRayIntersect::traverse(const LLOctreeNode<LLVolumeTriangle>  void LLOctreeTriangleRayIntersect::visit(const LLOctreeNode<LLVolumeTriangle>* node)  {  	for (LLOctreeNode<LLVolumeTriangle>::const_element_iter iter =  -			node->getData().begin(); iter != node->getData().end(); ++iter) +			node->getDataBegin(); iter != node->getDataEnd(); ++iter)  	{  		const LLVolumeTriangle* tri = *iter; @@ -236,8 +236,8 @@ void LLVolumeOctreeValidate::visit(const LLOctreeNode<LLVolumeTriangle>* branch)  	}  	//children fit, check data -	for (LLOctreeNode<LLVolumeTriangle>::const_element_iter iter = branch->getData().begin();  -			iter != branch->getData().end(); ++iter) +	for (LLOctreeNode<LLVolumeTriangle>::const_element_iter iter = branch->getDataBegin();  +			iter != branch->getDataEnd(); ++iter)  	{  		const LLVolumeTriangle* tri = *iter; diff --git a/indra/llmath/llvolumeoctree.h b/indra/llmath/llvolumeoctree.h index 688d91dc40..9ae34a0c4e 100644 --- a/indra/llmath/llvolumeoctree.h +++ b/indra/llmath/llvolumeoctree.h @@ -37,9 +37,19 @@  class LLVolumeTriangle : public LLRefCount  {  public: +	void* operator new(size_t size) +	{ +		return ll_aligned_malloc_16(size); +	} + +	void operator delete(void* ptr) +	{ +		ll_aligned_free_16(ptr); +	} +  	LLVolumeTriangle()  	{ -		 +		mBinIndex = -1;	  	}  	LLVolumeTriangle(const LLVolumeTriangle& rhs) @@ -58,21 +68,38 @@ public:  	} -	LLVector4a mPositionGroup; +	LL_ALIGN_16(LLVector4a mPositionGroup);  	const LLVector4a* mV[3];  	U16 mIndex[3];  	F32 mRadius; +	mutable S32 mBinIndex; +  	virtual const LLVector4a& getPositionGroup() const;  	virtual const F32& getBinRadius() const; +	 +	S32 getBinIndex() const { return mBinIndex; } +	void setBinIndex(S32 idx) const { mBinIndex = idx; } + +  };  class LLVolumeOctreeListener : public LLOctreeListener<LLVolumeTriangle>  {  public: +	void* operator new(size_t size) +	{ +		return ll_aligned_malloc_16(size); +	} + +	void operator delete(void* ptr) +	{ +		ll_aligned_free_16(ptr); +	} +  	LLVolumeOctreeListener(LLOctreeNode<LLVolumeTriangle>* node);  	~LLVolumeOctreeListener(); @@ -99,8 +126,8 @@ public:  public: -	LLVector4a mBounds[2]; // bounding box (center, size) of this node and all its children (tight fit to objects) -	LLVector4a mExtents[2]; // extents (min, max) of this node and all its children +	LL_ALIGN_16(LLVector4a mBounds[2]); // bounding box (center, size) of this node and all its children (tight fit to objects) +	LL_ALIGN_16(LLVector4a mExtents[2]); // extents (min, max) of this node and all its children  };  class LLOctreeTriangleRayIntersect : public LLOctreeTraveler<LLVolumeTriangle> diff --git a/indra/llmath/tests/alignment_test.cpp b/indra/llmath/tests/alignment_test.cpp new file mode 100644 index 0000000000..ac0c45ae6f --- /dev/null +++ b/indra/llmath/tests/alignment_test.cpp @@ -0,0 +1,128 @@ +/** + * @file v3dmath_test.cpp + * @author Vir + * @date 2011-12 + * @brief v3dmath test cases. + * + * $LicenseInfo:firstyear=2011&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2011, Linden Research, Inc. + *  + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + *  + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * Lesser General Public License for more details. + *  + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA + *  + * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA + * $/LicenseInfo$ + */ + +// Tests related to allocating objects with alignment constraints, particularly for SSE support. + +#include "linden_common.h" +#include "../test/lltut.h" +#include "../llmath.h" +#include "../llsimdmath.h" +#include "../llvector4a.h" + +void* operator new(size_t size) +{ +	return ll_aligned_malloc_16(size); +} + +void operator delete(void *p) +{ +	ll_aligned_free_16(p); +} + +namespace tut +{ + +#define is_aligned(ptr,alignment) ((reinterpret_cast<uintptr_t>(ptr))%(alignment)==0) +#define is_aligned_relative(ptr,base_ptr,alignment) ((reinterpret_cast<uintptr_t>(ptr)-reinterpret_cast<uintptr_t>(base_ptr))%(alignment)==0) + +struct alignment_test {}; + +typedef test_group<alignment_test> alignment_test_t; +typedef alignment_test_t::object alignment_test_object_t; +tut::alignment_test_t tut_alignment_test("LLAlignment"); + +LL_ALIGN_PREFIX(16) +class MyVector4a +{ +	LLQuad mQ; +} LL_ALIGN_POSTFIX(16); + + +// Verify that aligned allocators perform as advertised. +template<> template<> +void alignment_test_object_t::test<1>() +{ +#   ifdef LL_DEBUG +	skip("This test fails on Windows when compiled in debug mode."); +#   endif +	 +	const int num_tests = 7; +	void *align_ptr; +	for (int i=0; i<num_tests; i++) +	{ +		align_ptr = ll_aligned_malloc_16(sizeof(MyVector4a)); +		ensure("ll_aligned_malloc_16 failed", is_aligned(align_ptr,16)); + +		align_ptr = ll_aligned_realloc_16(align_ptr,2*sizeof(MyVector4a)); +		ensure("ll_aligned_realloc_16 failed", is_aligned(align_ptr,16)); + +		ll_aligned_free_16(align_ptr); + +		align_ptr = ll_aligned_malloc_32(sizeof(MyVector4a)); +		ensure("ll_aligned_malloc_32 failed", is_aligned(align_ptr,32)); +		ll_aligned_free_32(align_ptr); +	} +} + +// In-place allocation of objects and arrays. +template<> template<> +void alignment_test_object_t::test<2>() +{ +	MyVector4a vec1; +	ensure("LLAlignment vec1 unaligned", is_aligned(&vec1,16)); +	 +	MyVector4a veca[12]; +	ensure("LLAlignment veca unaligned", is_aligned(veca,16)); +} + +// Heap allocation of objects and arrays. +template<> template<> +void alignment_test_object_t::test<3>() +{ +#   ifdef LL_DEBUG +	skip("This test fails on Windows when compiled in debug mode."); +#   endif +	 +	const int ARR_SIZE = 7; +	for(int i=0; i<ARR_SIZE; i++) +	{ +		MyVector4a *vecp = new MyVector4a; +		ensure("LLAlignment vecp unaligned", is_aligned(vecp,16)); +		delete vecp; +	} + +	MyVector4a *veca = new MyVector4a[ARR_SIZE]; +	ensure("LLAligment veca base", is_aligned(veca,16)); +	for(int i=0; i<ARR_SIZE; i++) +	{ +		std::cout << "veca[" << i << "]" << std::endl; +		ensure("LLAlignment veca member unaligned", is_aligned(&veca[i],16)); +	} +} + +}  | 
