summaryrefslogtreecommitdiff
path: root/indra
diff options
context:
space:
mode:
authorBrad Payne (Vir Linden) <vir@lindenlab.com>2011-12-21 17:02:47 -0500
committerBrad Payne (Vir Linden) <vir@lindenlab.com>2011-12-21 17:02:47 -0500
commitc8682722ad6b889b13ce288c417cb6b82ed273ac (patch)
tree234f76107be807c1bd36d8c1664364ee17421507 /indra
parent461241a6300dcf29b06b2403c824fefd1d91a8fd (diff)
SH-2789 WIP - aligned alloc and realloc
Diffstat (limited to 'indra')
-rwxr-xr-xindra/llcommon/llmemory.h15
-rwxr-xr-xindra/llmath/llvector4a.cpp5
-rwxr-xr-xindra/llmath/llvolume.cpp22
-rwxr-xr-x[-rw-r--r--]indra/newview/llpolymesh.cpp24
4 files changed, 41 insertions, 25 deletions
diff --git a/indra/llcommon/llmemory.h b/indra/llcommon/llmemory.h
index c61d06e924..3eaf700bf1 100755
--- a/indra/llcommon/llmemory.h
+++ b/indra/llcommon/llmemory.h
@@ -46,7 +46,7 @@ inline void ll_aligned_free( void* ptr )
inline void* ll_aligned_malloc_16(size_t size) // returned hunk MUST be freed with ll_aligned_free_16().
{
#if defined(LL_WINDOWS)
- return _mm_malloc(size, 16);
+ return _aligned_malloc(size, 16);
#elif defined(LL_DARWIN)
return malloc(size); // default osx malloc is 16 byte aligned.
#else
@@ -58,10 +58,21 @@ inline void* ll_aligned_malloc_16(size_t size) // returned hunk MUST be freed wi
#endif
}
+inline void* ll_aligned_realloc_16(void* ptr, size_t size) // returned hunk MUST be freed with ll_aligned_free_16().
+{
+#if defined(LL_WINDOWS)
+ return _aligned_realloc(ptr, size, 16);
+#elif defined(LL_DARWIN)
+ return realloc(ptr,size); // default osx malloc is 16 byte aligned.
+#else
+ return realloc(ptr,size); // FIXME not guaranteed to be aligned.
+#endif
+}
+
inline void ll_aligned_free_16(void *p)
{
#if defined(LL_WINDOWS)
- _mm_free(p);
+ _aligned_free(p);
#elif defined(LL_DARWIN)
return free(p);
#else
diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp
index 7602ef0cb2..480ccf4ed9 100755
--- a/indra/llmath/llvector4a.cpp
+++ b/indra/llmath/llvector4a.cpp
@@ -41,14 +41,15 @@ extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast<const LLVector4a&> ( F
/*static */void LLVector4a::memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes)
{
- memcpy((void*)dst,(const void*)src,bytes);
-#if 0
+// memcpy((void*)dst,(const void*)src,bytes);
+#if 1
assert(src != NULL);
assert(dst != NULL);
assert(bytes > 0);
assert((bytes % sizeof(F32))== 0);
ll_assert_aligned(src,16);
ll_assert_aligned(dst,16);
+ assert(bytes%16==0);
F32* end = dst + (bytes / sizeof(F32) );
diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp
index 113d4835bb..9499ca29ac 100755
--- a/indra/llmath/llvolume.cpp
+++ b/indra/llmath/llvolume.cpp
@@ -6914,14 +6914,17 @@ void LLVolumeFace::pushVertex(const LLVector4a& pos, const LLVector4a& norm, con
// S32 old_size = mNumVertices*16;
//positions
- mPositions = (LLVector4a*) realloc(mPositions, new_size);
+ mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_size);
+ ll_assert_aligned(mPositions,16);
//normals
- mNormals = (LLVector4a*) realloc(mNormals, new_size);
-
+ mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_size);
+ ll_assert_aligned(mNormals,16);
+
//tex coords
new_size = ((new_verts*8)+0xF) & ~0xF;
- mTexCoords = (LLVector2*) realloc(mTexCoords, new_size);
+ mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, new_size);
+ ll_assert_aligned(mTexCoords,16);
//just clear binormals
@@ -6974,7 +6977,8 @@ void LLVolumeFace::pushIndex(const U16& idx)
S32 old_size = ((mNumIndices*2)+0xF) & ~0xF;
if (new_size != old_size)
{
- mIndices = (U16*) realloc(mIndices, new_size);
+ mIndices = (U16*) ll_aligned_realloc_16(mIndices, new_size);
+ ll_assert_aligned(mIndices,16);
}
mIndices[mNumIndices++] = idx;
@@ -7015,11 +7019,11 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat
}
//allocate new buffer space
- mPositions = (LLVector4a*) realloc(mPositions, new_count*sizeof(LLVector4a));
+ mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_count*sizeof(LLVector4a));
ll_assert_aligned(mPositions, 16);
- mNormals = (LLVector4a*) realloc(mNormals, new_count*sizeof(LLVector4a));
+ mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_count*sizeof(LLVector4a));
ll_assert_aligned(mNormals, 16);
- mTexCoords = (LLVector2*) realloc(mTexCoords, (new_count*sizeof(LLVector2)+0xF) & ~0xF);
+ mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, (new_count*sizeof(LLVector2)+0xF) & ~0xF);
ll_assert_aligned(mTexCoords, 16);
mNumVertices = new_count;
@@ -7066,7 +7070,7 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat
new_count = mNumIndices + face.mNumIndices;
//allocate new index buffer
- mIndices = (U16*) realloc(mIndices, (new_count*sizeof(U16)+0xF) & ~0xF);
+ mIndices = (U16*) ll_aligned_realloc_16(mIndices, (new_count*sizeof(U16)+0xF) & ~0xF);
//get destination address into new index buffer
U16* dst_idx = mIndices+mNumIndices;
diff --git a/indra/newview/llpolymesh.cpp b/indra/newview/llpolymesh.cpp
index 450f9b2be7..0860506086 100644..100755
--- a/indra/newview/llpolymesh.cpp
+++ b/indra/newview/llpolymesh.cpp
@@ -129,22 +129,22 @@ void LLPolyMeshSharedData::freeMeshData()
{
mNumVertices = 0;
- delete [] mBaseCoords;
+ ll_aligned_free_16(mBaseCoords);
mBaseCoords = NULL;
- delete [] mBaseNormals;
+ ll_aligned_free_16(mBaseNormals);
mBaseNormals = NULL;
- delete [] mBaseBinormals;
+ ll_aligned_free_16(mBaseBinormals);
mBaseBinormals = NULL;
- delete [] mTexCoords;
+ ll_aligned_free_16(mTexCoords);
mTexCoords = NULL;
- delete [] mDetailTexCoords;
+ ll_aligned_free_16(mDetailTexCoords);
mDetailTexCoords = NULL;
- delete [] mWeights;
+ ll_aligned_free_16(mWeights);
mWeights = NULL;
}
@@ -229,12 +229,12 @@ U32 LLPolyMeshSharedData::getNumKB()
BOOL LLPolyMeshSharedData::allocateVertexData( U32 numVertices )
{
U32 i;
- mBaseCoords = new LLVector3[ numVertices ];
- mBaseNormals = new LLVector3[ numVertices ];
- mBaseBinormals = new LLVector3[ numVertices ];
- mTexCoords = new LLVector2[ numVertices ];
- mDetailTexCoords = new LLVector2[ numVertices ];
- mWeights = new F32[ numVertices ];
+ mBaseCoords = (LLVector3*) ll_aligned_malloc_16(numVertices*sizeof(LLVector3));
+ mBaseNormals = (LLVector3*) ll_aligned_malloc_16(numVertices*sizeof(LLVector3));
+ mBaseBinormals = (LLVector3*) ll_aligned_malloc_16(numVertices*sizeof(LLVector3));
+ mTexCoords = (LLVector2*) ll_aligned_malloc_16(numVertices*sizeof(LLVector2));
+ mDetailTexCoords = (LLVector2*) ll_aligned_malloc_16(numVertices*sizeof(LLVector2));
+ mWeights = (F32*) ll_aligned_malloc_16(numVertices*sizeof(F32));
for (i = 0; i < numVertices; i++)
{
mWeights[i] = 0.f;