MAINT-2371 More optimizations.

Reviewed by Graham
author: Dave Parks <davep@lindenlab.com> 2013-03-04 18:01:42 -0600
committer: Dave Parks <davep@lindenlab.com> 2013-03-04 18:01:42 -0600
commit: 609ed855e1160505238378a1be49e2b92e8496f5 (patch)
tree: aed340024162360eaf026f29e50a633e77926036 /indra/llmath
parent: eb859ce5c18d4310ae4828d6964b2e8c088ee95f (diff)
2 files changed, 127 insertions, 94 deletions
diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp
index 77d89568df..d614695efb 100644
--- a/indra/llmath/llvolume.cpp
+++ b/indra/llmath/llvolume.cpp
@@ -5180,6 +5180,7 @@ LLVolumeFace::LLVolumeFace() :
 	mNumS(0),
 	mNumT(0),
 	mNumVertices(0),
+	mNumAllocatedVertices(0),
 	mNumIndices(0),
 	mPositions(NULL),
 	mNormals(NULL),
@@ -5204,6 +5205,7 @@ LLVolumeFace::LLVolumeFace(const LLVolumeFace& src)
 	mNumS(0),
 	mNumT(0),
 	mNumVertices(0),
+	mNumAllocatedVertices(0),
 	mNumIndices(0),
 	mPositions(NULL),
 	mNormals(NULL),
@@ -5258,12 +5260,6 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src)
 		{
 			LLVector4a::memcpyNonAliased16((F32*) mTexCoords, (F32*) src.mTexCoords, tc_size);
 		}
-		else
-		{
-			ll_aligned_free_16(mTexCoords) ;
-			mTexCoords = NULL ;
-		}
-
 
 		if (src.mBinormals)
 		{
@@ -5311,10 +5307,11 @@ void LLVolumeFace::freeData()
 {
 	ll_aligned_free_16(mPositions);
 	mPositions = NULL;
-	ll_aligned_free_16( mNormals);
+
+	//normals and texture coordinates are part of the same buffer as mPositions, do not free them separately
 	mNormals = NULL;
-	ll_aligned_free_16(mTexCoords);
 	mTexCoords = NULL;
+
 	ll_aligned_free_16(mIndices);
 	mIndices = NULL;
 	ll_aligned_free_16(mBinormals);
@@ -5496,18 +5493,6 @@ void LLVolumeFace::optimize(F32 angle_cutoff)
 	llassert(new_face.mNumIndices == mNumIndices);
 	llassert(new_face.mNumVertices <= mNumVertices);
 
-	if (angle_cutoff > 1.f && !mNormals)
-	{
-		ll_aligned_free_16(new_face.mNormals);
-		new_face.mNormals = NULL;
-	}
-
-	if (!mTexCoords)
-	{
-		ll_aligned_free_16(new_face.mTexCoords);
-		new_face.mTexCoords = NULL;
-	}
-
 	swapData(new_face);
 }
 
@@ -5708,35 +5693,44 @@ public:
 
 	void updateScores()
 	{
-		for (U32 i = MaxSizeVertexCache; i < MaxSizeVertexCache+3; ++i)
-		{ //trailing 3 vertices aren't actually in the cache for scoring purposes
-			if (mCache[i])
+		LLVCacheVertexData** data_iter = mCache+MaxSizeVertexCache;
+		LLVCacheVertexData** end_data = mCache+MaxSizeVertexCache+3;
+
+		while(data_iter != end_data)
+		{
+			LLVCacheVertexData* data = *data_iter++;
+			//trailing 3 vertices aren't actually in the cache for scoring purposes
+			if (data)
 			{
-				mCache[i]->mCacheTag = -1;
+				data->mCacheTag = -1;
 			}
 		}
 
-		for (U32 i = 0; i < MaxSizeVertexCache; ++i)
+		data_iter = mCache;
+		end_data = mCache+MaxSizeVertexCache;
+
+		while (data_iter != end_data)
 		{ //update scores of vertices in cache
-			if (mCache[i])
+			LLVCacheVertexData* data = *data_iter++;
+			if (data)
 			{
-				mCache[i]->mScore = find_vertex_score(*(mCache[i]));
-				llassert(mCache[i]->mCacheTag == i);
+				data->mScore = find_vertex_score(*data);
 			}
 		}
 
 		mBestTriangle = NULL;
 		//update triangle scores
-		for (U32 i = 0; i < MaxSizeVertexCache+3; ++i)
+		data_iter = mCache;
+		end_data = mCache+MaxSizeVertexCache+3;
+
+		while (data_iter != end_data)
 		{
-			LLVCacheVertexData* data = mCache[i];
+			LLVCacheVertexData* data = *data_iter++;
 			if (data)
 			{
-				U32 count = data->mTriangles.size();
-
-				for (U32 j = 0; j < count; ++j)
+				for (std::vector<LLVCacheTriangleData*>::iterator iter = data->mTriangles.begin(), end_iter = data->mTriangles.end(); iter != end_iter; ++iter)
 				{
-					LLVCacheTriangleData* tri = data->mTriangles[j];
+					LLVCacheTriangleData* tri = *iter;
 					if (tri->mActive)
 					{
 						tri->mScore = tri->mVertex[0]->mScore;
@@ -5753,13 +5747,17 @@ public:
 		}
 
 		//knock trailing 3 vertices off the cache
-		for (U32 i = MaxSizeVertexCache; i < MaxSizeVertexCache+3; ++i)
+		data_iter = mCache+MaxSizeVertexCache;
+		end_data = mCache+MaxSizeVertexCache+3;
+		while (data_iter != end_data)
 		{
-			if (mCache[i])
+			LLVCacheVertexData* data = *data_iter;
+			if (data)
 			{
-				llassert(mCache[i]->mCacheTag == -1);
-				mCache[i] = NULL;
+				llassert(data->mCacheTag == -1);
+				*data_iter = NULL;
 			}
+			++data_iter;
 		}
 	}
 };
@@ -5894,10 +5892,10 @@ void LLVolumeFace::cacheOptimize()
 	
 	//allocate space for new buffer
 	S32 num_verts = mNumVertices;
-	LLVector4a* pos = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts);
-	LLVector4a* norm = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts);
 	S32 size = ((num_verts*sizeof(LLVector2)) + 0xF) & ~0xF;
-	LLVector2* tc = (LLVector2*) ll_aligned_malloc_16(size);
+	LLVector4a* pos = (LLVector4a*) ll_aligned_malloc(sizeof(LLVector4a)*2*num_verts+size, 64);
+	LLVector4a* norm = pos + num_verts;
+	LLVector2* tc = (LLVector2*) (norm + num_verts);
 
 	LLVector4a* wght = NULL;
 	if (mWeights)
@@ -5945,9 +5943,8 @@ void LLVolumeFace::cacheOptimize()
 		mIndices[i] = new_idx[mIndices[i]];
 	}
 	
-	ll_aligned_free_16(mPositions);
-	ll_aligned_free_16(mNormals);
-	ll_aligned_free_16(mTexCoords);
+	ll_aligned_free(mPositions);
+	// DO NOT free mNormals and mTexCoords as they are part of mPositions buffer
 	ll_aligned_free_16(mWeights);
 	ll_aligned_free_16(mBinormals);
 
@@ -6664,24 +6661,22 @@ void LLVolumeFace::createBinormals()
 
 void LLVolumeFace::resizeVertices(S32 num_verts)
 {
-	ll_aligned_free_16(mPositions);
-	ll_aligned_free_16(mNormals);
+	ll_aligned_free(mPositions);
+	//DO NOT free mNormals and mTexCoords as they are part of mPositions buffer
 	ll_aligned_free_16(mBinormals);
-	ll_aligned_free_16(mTexCoords);
-
+	
 	mBinormals = NULL;
 
 	if (num_verts)
 	{
-		mPositions = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts);
-		ll_assert_aligned(mPositions, 16);
-		mNormals = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts);
-		ll_assert_aligned(mNormals, 16);
-
 		//pad texture coordinate block end to allow for QWORD reads
 		S32 size = ((num_verts*sizeof(LLVector2)) + 0xF) & ~0xF;
-		mTexCoords = (LLVector2*) ll_aligned_malloc_16(size);
-		ll_assert_aligned(mTexCoords, 16);
+
+		mPositions = (LLVector4a*) ll_aligned_malloc(sizeof(LLVector4a)*2*num_verts+size, 64);
+		mNormals = mPositions+num_verts;
+		mTexCoords = (LLVector2*) (mNormals+num_verts);
+
+		ll_assert_aligned(mPositions, 64);
 	}
 	else
 	{
@@ -6691,6 +6686,7 @@ void LLVolumeFace::resizeVertices(S32 num_verts)
 	}
 
 	mNumVertices = num_verts;
+	mNumAllocatedVertices = num_verts;
 }
 
 void LLVolumeFace::pushVertex(const LLVolumeFace::VertexData& cv)
@@ -6701,27 +6697,43 @@ void LLVolumeFace::pushVertex(const LLVolumeFace::VertexData& cv)
 void LLVolumeFace::pushVertex(const LLVector4a& pos, const LLVector4a& norm, const LLVector2& tc)
 {
 	S32 new_verts = mNumVertices+1;
-	S32 new_size = new_verts*16;
-	S32 old_size = mNumVertices*16;
 
-	//positions
-	mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_size, old_size);
-	ll_assert_aligned(mPositions,16);
+	if (new_verts > mNumAllocatedVertices)
+	{ 
+		//double buffer size on expansion
+		new_verts *= 2;
+
+		S32 new_tc_size = ((new_verts*8)+0xF) & ~0xF;
+		S32 old_tc_size = ((mNumVertices*8)+0xF) & ~0xF;
+
+		S32 old_vsize = mNumVertices*16;
+		
+		S32 new_size = new_verts*16*2+new_tc_size;
+
+		LLVector4a* old_buf = mPositions;
+
+		mPositions = (LLVector4a*) ll_aligned_malloc(new_size, 64);
+		mNormals = mPositions+new_verts;
+		mTexCoords = (LLVector2*) (mNormals+new_verts);
+
+		//positions
+		LLVector4a::memcpyNonAliased16((F32*) mPositions, (F32*) old_buf, old_vsize);
+		
+		//normals
+		LLVector4a::memcpyNonAliased16((F32*) mNormals, (F32*) (old_buf+mNumVertices), old_vsize);
 	
-	//normals
-	mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_size, old_size);
-	ll_assert_aligned(mNormals,16);
-
-	//tex coords
-	new_size = ((new_verts*8)+0xF) & ~0xF;
-	old_size = ((mNumVertices*8)+0xF) & ~0xF;
-	mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, new_size, old_size);
-	ll_assert_aligned(mTexCoords,16);
+		//tex coords
+		LLVector4a::memcpyNonAliased16((F32*) mTexCoords, (F32*) (old_buf+mNumVertices*2), old_tc_size);
 	
+		//just clear binormals
+		ll_aligned_free_16(mBinormals);
 
-	//just clear binormals
-	ll_aligned_free_16(mBinormals);
-	mBinormals = NULL;
+		ll_aligned_free(old_buf);
+
+		mNumAllocatedVertices = new_verts;
+
+		mBinormals = NULL;
+	}
 
 	mPositions[mNumVertices] = pos;
 	mNormals[mNumVertices] = norm;
@@ -6810,13 +6822,23 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat
 		llerrs << "Cannot append empty face." << llendl;
 	}
 
+	U32 old_vsize = mNumVertices*16;
+	U32 new_vsize = new_count * 16;
+	U32 old_tcsize = (mNumVertices*sizeof(LLVector2)+0xF) & ~0xF;
+	U32 new_tcsize = (new_count*sizeof(LLVector2)+0xF) & ~0xF;
+	U32 new_size = new_vsize * 2 + new_tcsize;
+
 	//allocate new buffer space
-	mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_count*sizeof(LLVector4a), mNumVertices*sizeof(LLVector4a));
-	ll_assert_aligned(mPositions, 16);
-	mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_count*sizeof(LLVector4a), mNumVertices*sizeof(LLVector4a));
-	ll_assert_aligned(mNormals, 16);
-	mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, (new_count*sizeof(LLVector2)+0xF) & ~0xF, (mNumVertices*sizeof(LLVector2)+0xF) & ~0xF);
-	ll_assert_aligned(mTexCoords, 16);
+	LLVector4a* old_buf = mPositions;
+	mPositions = (LLVector4a*) ll_aligned_malloc(new_size, 64);
+	mNormals = mPositions + new_count;
+	mTexCoords = (LLVector2*) (mNormals+new_count);
+
+	mNumAllocatedVertices = new_count;
+
+	LLVector4a::memcpyNonAliased16((F32*) mPositions, (F32*) old_buf, old_vsize);
+	LLVector4a::memcpyNonAliased16((F32*) mNormals, (F32*) (old_buf+mNumVertices), old_vsize);
+	LLVector4a::memcpyNonAliased16((F32*) mTexCoords, (F32*) (old_buf+mNumVertices*2), old_tcsize);
 	
 	mNumVertices = new_count;
 
@@ -6912,12 +6934,15 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
 	LLVector4a* pos = (LLVector4a*) mPositions;
 	LLVector4a* norm = (LLVector4a*) mNormals;
 	LLVector2* tc = (LLVector2*) mTexCoords;
-	S32 begin_stex = llfloor( profile[mBeginS].mV[2] );
+	F32 begin_stex = floorf(profile[mBeginS].mV[2]);
 	S32 num_s = ((mTypeMask & INNER_MASK) && (mTypeMask & FLAT_MASK) && mNumS > 2) ? mNumS/2 : mNumS;
 
 	S32 cur_vertex = 0;
+	S32 end_t = mBeginT+mNumT;
+	bool test = (mTypeMask & INNER_MASK) && (mTypeMask & FLAT_MASK) && mNumS > 2;
+
 	// Copy the vertices into the array
-	for (t = mBeginT; t < mBeginT + mNumT; t++)
+	for (t = mBeginT; t < end_t; t++)
 	{
 		tt = path_data[t].mTexT;
 		for (s = 0; s < num_s; s++)
@@ -6968,9 +6993,8 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
 			norm[cur_vertex].clear();
 			cur_vertex++;
 
-			if ((mTypeMask & INNER_MASK) && (mTypeMask & FLAT_MASK) && mNumS > 2 && s > 0)
+			if (test && s > 0)
 			{
-
 				pos[cur_vertex].load3(mesh[i].mPos.mV);
 				tc[cur_vertex] = LLVector2(ss,tt);
 			
@@ -7085,30 +7109,38 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
 	}
 
 	//generate normals 
-	for (U32 i = 0; i < mNumIndices/3; i++) //for each triangle
+	U32 count = mNumIndices/3;
+
+	for (U32 i = 0; i < count; i++) //for each triangle
 	{
 		const U16* idx = &(mIndices[i*3]);
 		
-
-		LLVector4a* v[] = 
-		{	pos+idx[0], pos+idx[1], pos+idx[2] };
+		LLVector4a& v0 = *(pos+idx[0]);
+		LLVector4a& v1 = *(pos+idx[1]);
+		LLVector4a& v2 = *(pos+idx[2]);
 		
-		LLVector4a* n[] = 
-		{	norm+idx[0], norm+idx[1], norm+idx[2] };
+		LLVector4a& n0 = *(norm+idx[0]);
+		LLVector4a& n1 = *(norm+idx[1]);
+		LLVector4a& n2 = *(norm+idx[2]);
 		
 		//calculate triangle normal
 		LLVector4a a, b, c;
 		
-		a.setSub(*v[0], *v[1]);
-		b.setSub(*v[0], *v[2]);
+		a.setSub(v0, v1);
+		b.setSub(v0, v2);
 		c.setCross3(a,b);
 
-		n[0]->add(c);
-		n[1]->add(c);
-		n[2]->add(c);
+		n0.add(c);
+		n1.add(c);
+		n2.add(c);
 		
 		//even out quad contributions
-		n[i%2+1]->add(c);
+		switch (i%2+1)
+		{
+			case 0: n0.add(c); break;
+			case 1: n1.add(c); break;
+			case 2: n2.add(c); break;
+		};
 	}
 	
 	// adjust normals based on wrapping and stitching
diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h
index 99158c1c44..1d3b0fe52f 100644
--- a/indra/llmath/llvolume.h
+++ b/indra/llmath/llvolume.h
@@ -912,6 +912,7 @@ public:
 	LLVector2   mTexCoordExtents[2]; //minimum and maximum of texture coordinates of the face.
 
 	S32 mNumVertices;
+	S32 mNumAllocatedVertices;
 	S32 mNumIndices;
 
 	LLVector4a* mPositions;
author	Dave Parks <davep@lindenlab.com>	2013-03-04 18:01:42 -0600
committer	Dave Parks <davep@lindenlab.com>	2013-03-04 18:01:42 -0600
commit	609ed855e1160505238378a1be49e2b92e8496f5 (patch)
tree	aed340024162360eaf026f29e50a633e77926036 /indra/llmath
parent	eb859ce5c18d4310ae4828d6964b2e8c088ee95f (diff)