diff options
| -rw-r--r-- | indra/llcommon/llmemory.h | 32 | ||||
| -rw-r--r-- | indra/llmath/lloctree.h | 6 | ||||
| -rw-r--r-- | indra/llmath/llvolume.cpp | 89 | ||||
| -rw-r--r-- | indra/llrender/llrender.cpp | 8 | ||||
| -rw-r--r-- | indra/llrender/llrender.h | 1 | ||||
| -rw-r--r-- | indra/llrender/llvertexbuffer.cpp | 17 | ||||
| -rw-r--r-- | indra/newview/lldrawable.cpp | 4 | ||||
| -rw-r--r-- | indra/newview/llface.cpp | 8 | ||||
| -rw-r--r-- | indra/newview/llspatialpartition.cpp | 14 | 
9 files changed, 102 insertions, 77 deletions
diff --git a/indra/llcommon/llmemory.h b/indra/llcommon/llmemory.h index 117268cfe7..1c8c91f57e 100644 --- a/indra/llcommon/llmemory.h +++ b/indra/llcommon/llmemory.h @@ -34,7 +34,7 @@  #include <stdlib.h> -inline void* ll_aligned_malloc_16(size_t size) // returned hunk MUST be freed with ll_aligned_free(). +inline void* ll_aligned_malloc_16(size_t size) // returned hunk MUST be freed with ll_aligned_free_16().  {  #if defined(LL_WINDOWS)  	return _mm_malloc(size, 16); @@ -43,13 +43,9 @@ inline void* ll_aligned_malloc_16(size_t size) // returned hunk MUST be freed wi  #else  	void *rtn;  	if (LL_LIKELY(0 == posix_memalign(&rtn, 16, size))) -	{  		return rtn; -	}  	else // bad alignment requested, or out of memory -	{  		return NULL; -	}  #endif  } @@ -64,6 +60,32 @@ inline void ll_aligned_free_16(void *p)  #endif  } +inline void* ll_aligned_malloc_32(size_t size) // returned hunk MUST be freed with ll_aligned_free_32(). +{ +#if defined(LL_WINDOWS) +	return _mm_malloc(size, 32); +#elif defined(LL_DARWIN) +# error implement me. +#else +	void *rtn; +	if (LL_LIKELY(0 == posix_memalign(&rtn, 32, size))) +		return rtn; +	else // bad alignment requested, or out of memory +		return NULL; +#endif +} + +inline void ll_aligned_free_32(void *p) +{ +#if defined(LL_WINDOWS) +	_mm_free(p); +#elif defined(LL_DARWIN) +# error implement me. +#else +	free(p); // posix_memalign() is compatible with heap deallocator +#endif +} +  class LL_COMMON_API LLMemory  {  public: diff --git a/indra/llmath/lloctree.h b/indra/llmath/lloctree.h index f61ce6ce05..59828ae565 100644 --- a/indra/llmath/lloctree.h +++ b/indra/llmath/lloctree.h @@ -102,7 +102,7 @@ public:  	:	mParent((oct_node*)parent),   		mOctant(octant)   	{  -		mD = (LLVector4a*) _mm_malloc(sizeof(LLVector4a)*4, 16); +		mD = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*4);  		mD[CENTER] = center;  		mD[SIZE] = size; @@ -125,7 +125,7 @@ public:  			delete getChild(i);  		}  -		_mm_free(mD); +		ll_aligned_free_16(mD);  	}  	inline const BaseType* getParent()	const			{ return mParent; } @@ -640,7 +640,7 @@ public:  		const LLVector4a& v = data->getPositionGroup();  		LLVector4a val; -		val.setSub(v, mD[CENTER]); +		val.setSub(v, BaseType::mD[BaseType::CENTER]);  		val.setAbs(val);  		S32 lt = val.lessThan4(MAX_MAG).getComparisonMask() & 0x7; diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 8cb9475994..09ab47b890 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -31,6 +31,7 @@   */  #include "linden_common.h" +#include "llmemory.h"  #include "llmath.h"  #include <set> @@ -1987,7 +1988,7 @@ BOOL LLVolume::generate()  void LLVolumeFace::VertexData::init()  { -	mData = (LLVector4a*) _mm_malloc(32, 16); +	mData = (LLVector4a*) ll_aligned_malloc_16(32);  }  LLVolumeFace::VertexData::VertexData() @@ -2004,7 +2005,7 @@ LLVolumeFace::VertexData::VertexData(const VertexData& rhs)  LLVolumeFace::VertexData::~VertexData()  { -	_mm_free(mData); +	ll_aligned_free_16(mData);  }  LLVector4a& LLVolumeFace::VertexData::getPosition() @@ -4406,7 +4407,7 @@ S32 LLVolume::lineSegmentIntersect(const LLVector4a& start, const LLVector4a& en  				face.createOctree();  			} -			LLVector4a* p = (LLVector4a*) face.mPositions; +			//LLVector4a* p = (LLVector4a*) face.mPositions;  			LLOctreeTriangleRayIntersect intersect(start, dir, &face, &closest_t, intersection, tex_coord, normal, bi_normal);  			intersect.traverse(face.mOctree); @@ -5196,7 +5197,7 @@ LLVolumeFace::LLVolumeFace() :  	mWeights(NULL),  	mOctree(NULL)  { -	mExtents = (LLVector4a*) _mm_malloc(48, 16); +	mExtents = (LLVector4a*) ll_aligned_malloc_16(48);  	mCenter = mExtents+2;  } @@ -5217,7 +5218,7 @@ LLVolumeFace::LLVolumeFace(const LLVolumeFace& src)  	mWeights(NULL),  	mOctree(NULL)  {  -	mExtents = (LLVector4a*) _mm_malloc(48, 16); +	mExtents = (LLVector4a*) ll_aligned_malloc_16(48);  	mCenter = mExtents+2;  	*this = src;  } @@ -5264,7 +5265,7 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src)  		}  		else  		{ -			_mm_free(mBinormals); +			ll_aligned_free_16(mBinormals);  			mBinormals = NULL;  		} @@ -5275,7 +5276,7 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src)  		}  		else  		{ -			_mm_free(mWeights); +			ll_aligned_free_16(mWeights);  			mWeights = NULL;  		}  	} @@ -5295,7 +5296,7 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src)  LLVolumeFace::~LLVolumeFace()  { -	_mm_free(mExtents); +	ll_aligned_free_16(mExtents);  	mExtents = NULL;  	freeData(); @@ -5303,17 +5304,17 @@ LLVolumeFace::~LLVolumeFace()  void LLVolumeFace::freeData()  { -	_mm_free(mPositions); +	ll_aligned_free_16(mPositions);  	mPositions = NULL; -	_mm_free(mNormals); +	ll_aligned_free_16(mNormals);  	mNormals = NULL; -	_mm_free(mTexCoords); +	ll_aligned_free_16(mTexCoords);  	mTexCoords = NULL; -	_mm_free(mIndices); +	ll_aligned_free_16(mIndices);  	mIndices = NULL; -	_mm_free(mBinormals); +	ll_aligned_free_16(mBinormals);  	mBinormals = NULL; -	_mm_free(mWeights); +	ll_aligned_free_16(mWeights);  	mWeights = NULL;  	delete mOctree; @@ -6084,21 +6085,21 @@ void LLVolumeFace::createBinormals()  void LLVolumeFace::resizeVertices(S32 num_verts)  { -	_mm_free(mPositions); -	_mm_free(mNormals); -	_mm_free(mBinormals); -	_mm_free(mTexCoords); +	ll_aligned_free_16(mPositions); +	ll_aligned_free_16(mNormals); +	ll_aligned_free_16(mBinormals); +	ll_aligned_free_16(mTexCoords);  	mBinormals = NULL;  	if (num_verts)  	{ -		mPositions = (LLVector4a*) _mm_malloc(num_verts*16, 16); -		mNormals = (LLVector4a*) _mm_malloc(num_verts*16, 16); +		mPositions = (LLVector4a*) ll_aligned_malloc_16(num_verts*16); +		mNormals = (LLVector4a*) ll_aligned_malloc_16(num_verts*16);  		//pad texture coordinate block end to allow for QWORD reads  		S32 size = ((num_verts*8) + 0xF) & ~0xF; -		mTexCoords = (LLVector2*) _mm_malloc(size, 16); +		mTexCoords = (LLVector2*) ll_aligned_malloc_16(size);  	}  	else  	{ @@ -6121,20 +6122,20 @@ void LLVolumeFace::pushVertex(const LLVector4a& pos, const LLVector4a& norm, con  	S32 new_size = new_verts*16;  	//positions -	LLVector4a* dst = (LLVector4a*) _mm_malloc(new_size, 16); +	LLVector4a* dst = (LLVector4a*) ll_aligned_malloc_16(new_size);  	if (mPositions)  	{  		LLVector4a::memcpyNonAliased16((F32*) dst, (F32*) mPositions, new_size/4); -		_mm_free(mPositions); +		ll_aligned_free_16(mPositions);  	}  	mPositions = dst;  	//normals -	dst = (LLVector4a*) _mm_malloc(new_size, 16); +	dst = (LLVector4a*) ll_aligned_malloc_16(new_size);  	if (mNormals)  	{  		LLVector4a::memcpyNonAliased16((F32*) dst, (F32*) mNormals, new_size/4); -		_mm_free(mNormals); +		ll_aligned_free_16(mNormals);  	}  	mNormals = dst; @@ -6142,16 +6143,16 @@ void LLVolumeFace::pushVertex(const LLVector4a& pos, const LLVector4a& norm, con  	new_size = ((new_verts*8)+0xF) & ~0xF;  	{ -		LLVector2* dst = (LLVector2*) _mm_malloc(new_size, 16); +		LLVector2* dst = (LLVector2*) ll_aligned_malloc_16(new_size);  		if (mTexCoords)  		{  			LLVector4a::memcpyNonAliased16((F32*) dst, (F32*) mTexCoords, new_size/4); -			_mm_free(mTexCoords); +			ll_aligned_free_16(mTexCoords);  		}  	}  	//just clear binormals -	_mm_free(mBinormals); +	ll_aligned_free_16(mBinormals);  	mBinormals = NULL;  	mPositions[mNumVertices] = pos; @@ -6163,26 +6164,26 @@ void LLVolumeFace::pushVertex(const LLVector4a& pos, const LLVector4a& norm, con  void LLVolumeFace::allocateBinormals(S32 num_verts)  { -	_mm_free(mBinormals); -	mBinormals = (LLVector4a*) _mm_malloc(num_verts*16, 16); +	ll_aligned_free_16(mBinormals); +	mBinormals = (LLVector4a*) ll_aligned_malloc_16(num_verts*16);  }  void LLVolumeFace::allocateWeights(S32 num_verts)  { -	_mm_free(mWeights); -	mWeights = (LLVector4a*) _mm_malloc(num_verts*16, 16); +	ll_aligned_free_16(mWeights); +	mWeights = (LLVector4a*) ll_aligned_malloc_16(num_verts*16);  }  void LLVolumeFace::resizeIndices(S32 num_indices)  { -	_mm_free(mIndices); +	ll_aligned_free_16(mIndices);  	if (num_indices)  	{  		//pad index block end to allow for QWORD reads  		S32 size = ((num_indices*2) + 0xF) & ~0xF; -		mIndices = (U16*) _mm_malloc(size,16);	 +		mIndices = (U16*) ll_aligned_malloc_16(size);	  	}  	else  	{ @@ -6200,9 +6201,9 @@ void LLVolumeFace::pushIndex(const U16& idx)  	S32 old_size = (mNumIndices+0xF) & ~0xF;  	if (new_size != old_size)  	{ -		U16* dst = (U16*) _mm_malloc(new_size, 16); +		U16* dst = (U16*) ll_aligned_malloc_16(new_size);  		LLVector4a::memcpyNonAliased16((F32*) dst, (F32*) mIndices, new_size/4); -		_mm_free(mIndices); +		ll_aligned_free_16(mIndices);  		mIndices = dst;  	} @@ -6239,17 +6240,17 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat  	} -	LLVector4a* new_pos = (LLVector4a*) _mm_malloc(new_count*16, 16); -	LLVector4a* new_norm = (LLVector4a*) _mm_malloc(new_count*16, 16); -	LLVector2* new_tc = (LLVector2*) _mm_malloc((new_count*8+0xF) & ~0xF, 16); +	LLVector4a* new_pos = (LLVector4a*) ll_aligned_malloc_16(new_count*16); +	LLVector4a* new_norm = (LLVector4a*) ll_aligned_malloc_16(new_count*16); +	LLVector2* new_tc = (LLVector2*) ll_aligned_malloc_16((new_count*8+0xF) & ~0xF);  	LLVector4a::memcpyNonAliased16((F32*) new_pos, (F32*) mPositions, new_count*4);  	LLVector4a::memcpyNonAliased16((F32*) new_norm, (F32*) mNormals, new_count*4);  	LLVector4a::memcpyNonAliased16((F32*) new_tc, (F32*) mTexCoords, new_count*2); -	_mm_free(mPositions); -	_mm_free(mNormals); -	_mm_free(mTexCoords); +	ll_aligned_free_16(mPositions); +	ll_aligned_free_16(mNormals); +	ll_aligned_free_16(mTexCoords);  	mPositions = new_pos;  	mNormals = new_norm; @@ -6289,9 +6290,9 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat  	new_count = mNumIndices + face.mNumIndices; -	U16* new_indices = (U16*) _mm_malloc((new_count*2+0xF) & ~0xF, 16); +	U16* new_indices = (U16*) ll_aligned_malloc_16((new_count*2+0xF) & ~0xF);  	LLVector4a::memcpyNonAliased16((F32*) new_indices, (F32*) mIndices, new_count/2); -	_mm_free(mIndices); +	ll_aligned_free_16(mIndices);  	mIndices = new_indices;  	mNumIndices = new_count; diff --git a/indra/llrender/llrender.cpp b/indra/llrender/llrender.cpp index 3f70ccacb1..70601663e6 100644 --- a/indra/llrender/llrender.cpp +++ b/indra/llrender/llrender.cpp @@ -777,8 +777,8 @@ LLRender::LLRender()  	mTexcoordsp = tc.get();  	mColorsp = color.get(); -	mUIOffset = (LLVector4a*) _mm_malloc(LL_MAX_UI_STACK_DEPTH*sizeof(LLVector4a), 16); -	mUIScale = (LLVector4a*) _mm_malloc(LL_MAX_UI_STACK_DEPTH*sizeof(LLVector4a), 16); +	mUIOffset = (LLVector4a*) ll_aligned_malloc_16(LL_MAX_UI_STACK_DEPTH*sizeof(LLVector4a)); +	mUIScale = (LLVector4a*) ll_aligned_malloc_16(LL_MAX_UI_STACK_DEPTH*sizeof(LLVector4a));  	mTexUnits.reserve(LL_NUM_TEXTURE_LAYERS);  	for (U32 i = 0; i < LL_NUM_TEXTURE_LAYERS; i++) @@ -816,9 +816,9 @@ void LLRender::shutdown()  	delete mDummyTexUnit;  	mDummyTexUnit = NULL; -	_mm_free(mUIOffset); +	ll_aligned_free_16(mUIOffset);  	mUIOffset = NULL; -	_mm_free(mUIScale); +	ll_aligned_free_16(mUIScale);  	mUIScale = NULL;  } diff --git a/indra/llrender/llrender.h b/indra/llrender/llrender.h index 2bacf16dc6..11cd95646f 100644 --- a/indra/llrender/llrender.h +++ b/indra/llrender/llrender.h @@ -45,6 +45,7 @@  #include "v4coloru.h"  #include "llstrider.h"  #include "llpointer.h" +#include "llmemory.h"  #include "llglheaders.h"  class LLVertexBuffer; diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp index 4f2dae0cdf..48c20b09a8 100644 --- a/indra/llrender/llvertexbuffer.cpp +++ b/indra/llrender/llvertexbuffer.cpp @@ -31,6 +31,7 @@   */  #include "linden_common.h" +#include "llmemory.h"  #include <boost/static_assert.hpp> @@ -627,7 +628,7 @@ void LLVertexBuffer::createGLBuffer()  	{  		static int gl_buffer_idx = 0;  		mGLBuffer = ++gl_buffer_idx; -		mMappedData = (U8*) _mm_malloc(size, 16); +		mMappedData = (U8*) ll_aligned_malloc_16(size);  		memset(mMappedData, 0, size);  	}  } @@ -662,7 +663,7 @@ void LLVertexBuffer::createGLIndices()  	}  	else  	{ -		mMappedIndexData = (U8*) _mm_malloc(size, 16); +		mMappedIndexData = (U8*) ll_aligned_malloc_16(size);  		static int gl_buffer_idx = 0;  		mGLIndices = ++gl_buffer_idx;  	} @@ -683,7 +684,7 @@ void LLVertexBuffer::destroyGLBuffer()  		}  		else  		{ -			_mm_free(mMappedData); +			ll_aligned_free_16(mMappedData);  			mMappedData = NULL;  			mEmpty = TRUE;  		} @@ -710,7 +711,7 @@ void LLVertexBuffer::destroyGLIndices()  		}  		else  		{ -			_mm_free(mMappedIndexData); +			ll_aligned_free_16(mMappedIndexData);  			mMappedIndexData = NULL;  			mEmpty = TRUE;  		} @@ -846,8 +847,8 @@ void LLVertexBuffer::resizeBuffer(S32 newnverts, S32 newnindices)  			{  				if (!useVBOs())  				{ -					_mm_free(mMappedData); -					mMappedData = (U8*) _mm_malloc(newsize, 16); +					ll_aligned_free_16(mMappedData); +					mMappedData = (U8*) ll_aligned_malloc_16(newsize);  				}  				mResized = TRUE;  			} @@ -867,8 +868,8 @@ void LLVertexBuffer::resizeBuffer(S32 newnverts, S32 newnindices)  			{  				if (!useVBOs())  				{ -					_mm_free(mMappedIndexData); -					mMappedIndexData = (U8*) _mm_malloc(new_index_size, 16); +					ll_aligned_free_16(mMappedIndexData); +					mMappedIndexData = (U8*) ll_aligned_malloc_16(new_index_size);  				}  				mResized = TRUE;  			} diff --git a/indra/newview/lldrawable.cpp b/indra/newview/lldrawable.cpp index 04e433dcfd..ca408a309e 100644 --- a/indra/newview/lldrawable.cpp +++ b/indra/newview/lldrawable.cpp @@ -95,7 +95,7 @@ void LLDrawable::incrementVisible()  void LLDrawable::init()  { -	mExtents = (LLVector4a*) _mm_malloc(sizeof(LLVector4a)*3, 32); +	mExtents = (LLVector4a*) ll_aligned_malloc_32(sizeof(LLVector4a)*3);  	mPositionGroup = mExtents + 2;  	// mXform @@ -150,7 +150,7 @@ void LLDrawable::destroy()  		llinfos << "- Zombie drawables: " << sNumZombieDrawables << llendl;  	}*/	 -	_mm_free(mExtents); +	ll_aligned_free_32(mExtents);  	mExtents = mPositionGroup = NULL;  } diff --git a/indra/newview/llface.cpp b/indra/newview/llface.cpp index 0b5cf78261..dea160ae64 100644 --- a/indra/newview/llface.cpp +++ b/indra/newview/llface.cpp @@ -152,7 +152,7 @@ void cylindricalProjection(LLVector2 &tc, const LLVector4a& normal, const LLVect  void LLFace::init(LLDrawable* drawablep, LLViewerObject* objp)  { -	mExtents = (LLVector4a*) _mm_malloc(sizeof(LLVector4a)*2, 16); +	mExtents = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*2);  	mLastUpdateTime = gFrameTimeSeconds;  	mLastMoveTime = 0.f; @@ -269,7 +269,7 @@ void LLFace::destroy()  		mVObjp = NULL;  	} -	_mm_free(mExtents); +	ll_aligned_free_16(mExtents);  	mExtents = NULL;  } @@ -1328,8 +1328,8 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,  					for (S32 i = 0; i < num_vertices; i++)  					{	  						LLVector2 tc(vf.mTexCoords[i]); -						LLVector4a& norm = vf.mNormals[i]; -						LLVector4a& center = *(vf.mCenter); +						//LLVector4a& norm = vf.mNormals[i]; +						//LLVector4a& center = *(vf.mCenter);  						LLVector3 tmp(tc.mV[0], tc.mV[1], 0.f);  						tmp = tmp * *mTextureMatrix; diff --git a/indra/newview/llspatialpartition.cpp b/indra/newview/llspatialpartition.cpp index 77d36b1c2e..d0e3a1428f 100644 --- a/indra/newview/llspatialpartition.cpp +++ b/indra/newview/llspatialpartition.cpp @@ -239,7 +239,7 @@ void LLSpatialGroup::buildOcclusion()  {  	if (!mOcclusionVerts)  	{ -		mOcclusionVerts = (LLVector4a*) _mm_malloc(sizeof(LLVector4a)*8, 16); +		mOcclusionVerts = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*8);  	}  	LLVector4a fudge; @@ -342,13 +342,13 @@ LLSpatialGroup::~LLSpatialGroup()  		sQueryPool.release(mOcclusionQuery[LLViewerCamera::sCurCameraID]);  	} -	_mm_free(mOcclusionVerts); +	ll_aligned_free_16(mOcclusionVerts);  	LLMemType mt(LLMemType::MTYPE_SPACE_PARTITION);  	clearDrawMap();  	clearAtlasList() ; -	_mm_free(mBounds); +	ll_aligned_free_16(mBounds);  }  BOOL LLSpatialGroup::hasAtlas(LLTextureAtlas* atlasp) @@ -1163,7 +1163,7 @@ LLSpatialGroup::LLSpatialGroup(OctreeNode* node, LLSpatialPartition* part) :  	sNodeCount++;  	LLMemType mt(LLMemType::MTYPE_SPACE_PARTITION); -	mBounds = (LLVector4a*) _mm_malloc(sizeof(LLVector4a) * V4_COUNT, 16); +	mBounds = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a) * V4_COUNT);  	mExtents = mBounds + EXTENTS;  	mObjectBounds = mBounds + OBJECT_BOUNDS;  	mObjectExtents = mBounds + OBJECT_EXTENTS; @@ -1434,7 +1434,7 @@ void LLSpatialGroup::destroyGL()  		}  	} -	_mm_free(mOcclusionVerts); +	ll_aligned_free_16(mOcclusionVerts);  	mOcclusionVerts = NULL;  	for (LLSpatialGroup::element_iter i = getData().begin(); i != getData().end(); ++i) @@ -3557,7 +3557,7 @@ LLDrawInfo::LLDrawInfo(U16 start, U16 end, U32 count, U32 offset,  	mDrawMode(LLRender::TRIANGLES)  {  	mVertexBuffer->validateRange(mStart, mEnd, mCount, mOffset); -	mExtents = (LLVector4a*) _mm_malloc(sizeof(LLVector4a)*2, 16); +	mExtents = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*2);  	mDebugColor = (rand() << 16) + rand();  } @@ -3579,7 +3579,7 @@ LLDrawInfo::~LLDrawInfo()  		gPipeline.checkReferences(this);  	} -	_mm_free(mExtents); +	ll_aligned_free_16(mExtents);  }  void LLDrawInfo::validate()  | 
