diff options
| -rwxr-xr-x | indra/llcommon/llmemory.h | 15 | ||||
| -rwxr-xr-x | indra/llmath/llvector4a.cpp | 5 | ||||
| -rwxr-xr-x | indra/llmath/llvolume.cpp | 22 | ||||
| -rwxr-xr-x[-rw-r--r--] | indra/newview/llpolymesh.cpp | 24 | 
4 files changed, 41 insertions, 25 deletions
| diff --git a/indra/llcommon/llmemory.h b/indra/llcommon/llmemory.h index c61d06e924..3eaf700bf1 100755 --- a/indra/llcommon/llmemory.h +++ b/indra/llcommon/llmemory.h @@ -46,7 +46,7 @@ inline void ll_aligned_free( void* ptr )  inline void* ll_aligned_malloc_16(size_t size) // returned hunk MUST be freed with ll_aligned_free_16().  {  #if defined(LL_WINDOWS) -	return _mm_malloc(size, 16); +	return _aligned_malloc(size, 16);  #elif defined(LL_DARWIN)  	return malloc(size); // default osx malloc is 16 byte aligned.  #else @@ -58,10 +58,21 @@ inline void* ll_aligned_malloc_16(size_t size) // returned hunk MUST be freed wi  #endif  } +inline void* ll_aligned_realloc_16(void* ptr, size_t size) // returned hunk MUST be freed with ll_aligned_free_16(). +{ +#if defined(LL_WINDOWS) +	return _aligned_realloc(ptr, size, 16); +#elif defined(LL_DARWIN) +	return realloc(ptr,size); // default osx malloc is 16 byte aligned. +#else +	return realloc(ptr,size); // FIXME not guaranteed to be aligned. +#endif +} +  inline void ll_aligned_free_16(void *p)  {  #if defined(LL_WINDOWS) -	_mm_free(p); +	_aligned_free(p);  #elif defined(LL_DARWIN)  	return free(p);  #else diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp index 7602ef0cb2..480ccf4ed9 100755 --- a/indra/llmath/llvector4a.cpp +++ b/indra/llmath/llvector4a.cpp @@ -41,14 +41,15 @@ extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast<const LLVector4a&> ( F  /*static */void LLVector4a::memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes)  { -	memcpy((void*)dst,(const void*)src,bytes); -#if 0 +//	memcpy((void*)dst,(const void*)src,bytes); +#if 1  	assert(src != NULL);  	assert(dst != NULL);  	assert(bytes > 0);  	assert((bytes % sizeof(F32))== 0);   	ll_assert_aligned(src,16);  	ll_assert_aligned(dst,16); +	assert(bytes%16==0);  	F32* end = dst + (bytes / sizeof(F32) ); diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 113d4835bb..9499ca29ac 100755 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -6914,14 +6914,17 @@ void LLVolumeFace::pushVertex(const LLVector4a& pos, const LLVector4a& norm, con  //	S32 old_size = mNumVertices*16;  	//positions -	mPositions = (LLVector4a*) realloc(mPositions, new_size); +	mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_size); +	ll_assert_aligned(mPositions,16);  	//normals -	mNormals = (LLVector4a*) realloc(mNormals, new_size); -	 +	mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_size); +	ll_assert_aligned(mNormals,16); +  	//tex coords  	new_size = ((new_verts*8)+0xF) & ~0xF; -	mTexCoords = (LLVector2*) realloc(mTexCoords, new_size); +	mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, new_size); +	ll_assert_aligned(mTexCoords,16);  	//just clear binormals @@ -6974,7 +6977,8 @@ void LLVolumeFace::pushIndex(const U16& idx)  	S32 old_size = ((mNumIndices*2)+0xF) & ~0xF;  	if (new_size != old_size)  	{ -		mIndices = (U16*) realloc(mIndices, new_size); +		mIndices = (U16*) ll_aligned_realloc_16(mIndices, new_size); +		ll_assert_aligned(mIndices,16);  	}  	mIndices[mNumIndices++] = idx; @@ -7015,11 +7019,11 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat  	}  	//allocate new buffer space -	mPositions = (LLVector4a*) realloc(mPositions, new_count*sizeof(LLVector4a)); +	mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_count*sizeof(LLVector4a));  	ll_assert_aligned(mPositions, 16); -	mNormals = (LLVector4a*) realloc(mNormals, new_count*sizeof(LLVector4a)); +	mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_count*sizeof(LLVector4a));  	ll_assert_aligned(mNormals, 16); -	mTexCoords = (LLVector2*) realloc(mTexCoords, (new_count*sizeof(LLVector2)+0xF) & ~0xF); +	mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, (new_count*sizeof(LLVector2)+0xF) & ~0xF);  	ll_assert_aligned(mTexCoords, 16);  	mNumVertices = new_count; @@ -7066,7 +7070,7 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat  	new_count = mNumIndices + face.mNumIndices;  	//allocate new index buffer -	mIndices = (U16*) realloc(mIndices, (new_count*sizeof(U16)+0xF) & ~0xF); +	mIndices = (U16*) ll_aligned_realloc_16(mIndices, (new_count*sizeof(U16)+0xF) & ~0xF);  	//get destination address into new index buffer  	U16* dst_idx = mIndices+mNumIndices; diff --git a/indra/newview/llpolymesh.cpp b/indra/newview/llpolymesh.cpp index 450f9b2be7..0860506086 100644..100755 --- a/indra/newview/llpolymesh.cpp +++ b/indra/newview/llpolymesh.cpp @@ -129,22 +129,22 @@ void LLPolyMeshSharedData::freeMeshData()          {                  mNumVertices = 0; -                delete [] mBaseCoords; +                ll_aligned_free_16(mBaseCoords);                  mBaseCoords = NULL; -                delete [] mBaseNormals; +                ll_aligned_free_16(mBaseNormals);                  mBaseNormals = NULL; -                delete [] mBaseBinormals; +                ll_aligned_free_16(mBaseBinormals);                  mBaseBinormals = NULL; -                delete [] mTexCoords; +                ll_aligned_free_16(mTexCoords);                  mTexCoords = NULL; -                delete [] mDetailTexCoords; +                ll_aligned_free_16(mDetailTexCoords);                  mDetailTexCoords = NULL; -                delete [] mWeights; +                ll_aligned_free_16(mWeights);                  mWeights = NULL;          } @@ -229,12 +229,12 @@ U32 LLPolyMeshSharedData::getNumKB()  BOOL LLPolyMeshSharedData::allocateVertexData( U32 numVertices )  {          U32 i; -        mBaseCoords = new LLVector3[ numVertices ]; -        mBaseNormals = new LLVector3[ numVertices ]; -        mBaseBinormals = new LLVector3[ numVertices ]; -        mTexCoords = new LLVector2[ numVertices ]; -        mDetailTexCoords = new LLVector2[ numVertices ]; -        mWeights = new F32[ numVertices ]; +        mBaseCoords = (LLVector3*) ll_aligned_malloc_16(numVertices*sizeof(LLVector3)); +        mBaseNormals = (LLVector3*) ll_aligned_malloc_16(numVertices*sizeof(LLVector3)); +        mBaseBinormals = (LLVector3*) ll_aligned_malloc_16(numVertices*sizeof(LLVector3)); +        mTexCoords = (LLVector2*) ll_aligned_malloc_16(numVertices*sizeof(LLVector2)); +        mDetailTexCoords = (LLVector2*) ll_aligned_malloc_16(numVertices*sizeof(LLVector2)); +        mWeights = (F32*) ll_aligned_malloc_16(numVertices*sizeof(F32));          for (i = 0; i < numVertices; i++)          {                  mWeights[i] = 0.f; | 
