From f8e059deee28500b88c8c172eaa8c4d7ca657748 Mon Sep 17 00:00:00 2001
From: Dave Parks <davep@lindenlab.com>
Date: Fri, 8 Mar 2013 17:11:30 -0600
Subject: MAINT-2371 Lat round of optimizations.

Reviewed by Graham
---
 indra/llmath/llmatrix4a.h          |   11 +-
 indra/llmath/llvector4a.inl        |   13 +-
 indra/llmath/llvolume.cpp          | 1791 +++++++++++++-----------------------
 indra/llmath/llvolume.h            |   79 +-
 indra/newview/llflexibleobject.cpp |   18 +-
 indra/newview/llspatialpartition.h |    2 +-
 indra/newview/llvovolume.cpp       |  157 ++--
 7 files changed, 824 insertions(+), 1247 deletions(-)

(limited to 'indra')

diff --git a/indra/llmath/llmatrix4a.h b/indra/llmath/llmatrix4a.h
index c4cefdb4fa..d141298f69 100644
--- a/indra/llmath/llmatrix4a.h
+++ b/indra/llmath/llmatrix4a.h
@@ -107,15 +107,14 @@ public:
 
 	inline void rotate(const LLVector4a& v, LLVector4a& res)
 	{
+		LLVector4a y,z;
+
 		res = _mm_shuffle_ps(v, v, _MM_SHUFFLE(0, 0, 0, 0));
-		res.mul(mMatrix[0]);
-		
-		LLVector4a y;
 		y = _mm_shuffle_ps(v, v, _MM_SHUFFLE(1, 1, 1, 1));
-		y.mul(mMatrix[1]);
-
-		LLVector4a z;
 		z = _mm_shuffle_ps(v, v, _MM_SHUFFLE(2, 2, 2, 2));
+		
+		res.mul(mMatrix[0]);
+		y.mul(mMatrix[1]);
 		z.mul(mMatrix[2]);
 
 		res.add(y);
diff --git a/indra/llmath/llvector4a.inl b/indra/llmath/llvector4a.inl
index 7c52ffef21..35a67204ec 100644
--- a/indra/llmath/llvector4a.inl
+++ b/indra/llmath/llvector4a.inl
@@ -460,16 +460,13 @@ inline void LLVector4a::setMax(const LLVector4a& lhs, const LLVector4a& rhs)
 	mQ = _mm_max_ps(lhs.mQ, rhs.mQ);
 }
 
-// Set this to  (c * lhs) + rhs * ( 1 - c)
+// Set this to  lhs + (rhs-lhs)*c
 inline void LLVector4a::setLerp(const LLVector4a& lhs, const LLVector4a& rhs, F32 c)
 {
-	LLVector4a a = lhs;
-	a.mul(c);
-	
-	LLVector4a b = rhs;
-	b.mul(1.f-c);
-	
-	setAdd(a, b);
+	LLVector4a t;
+	t.setSub(rhs,lhs);
+	t.mul(c);
+	setAdd(lhs, t);
 }
 
 inline LLBool32 LLVector4a::isFinite3() const
diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp
index f989e8ed17..9fc72fd801 100644
--- a/indra/llmath/llvolume.cpp
+++ b/indra/llmath/llvolume.cpp
@@ -94,6 +94,95 @@ const S32 SCULPT_MIN_AREA_DETAIL = 1;
 
 extern BOOL gDebugGL;
 
+bool less_than_max_mag(const LLVector4a& vec);
+
+template <class T, U32 alignment>
+LLAlignedArray<T, alignment>::LLAlignedArray()
+{
+	mArray = NULL;
+	mElementCount = 0;
+	mCapacity = 0;
+}
+
+template <class T, U32 alignment>
+LLAlignedArray<T, alignment>::~LLAlignedArray()
+{
+	ll_aligned_free(mArray);
+	mArray = NULL;
+	mElementCount = 0;
+	mCapacity = 0;
+}
+
+template <class T, U32 alignment>
+void LLAlignedArray<T, alignment>::push_back(const T& elem)
+{
+	T* old_buf = NULL;
+	if (mCapacity <= mElementCount)
+	{
+		mCapacity++;
+		mCapacity *= 2;
+		T* new_buf = (T*) ll_aligned_malloc(mCapacity*sizeof(T), alignment);
+		if (mArray)
+		{
+			LLVector4a::memcpyNonAliased16((F32*) new_buf, (F32*) mArray, sizeof(T)*mElementCount);
+		}
+		old_buf = mArray;
+		mArray = new_buf;
+	}
+
+	mArray[mElementCount++] = elem;
+
+	//delete old array here to prevent error on a.push_back(a[0])
+	ll_aligned_free(old_buf);
+}
+
+template <class T, U32 alignment>
+void LLAlignedArray<T, alignment>::resize(U32 size)
+{
+	if (mCapacity < size)
+	{
+		mCapacity = size+mCapacity*2;
+		T* new_buf = mCapacity > 0 ? (T*) ll_aligned_malloc(mCapacity*sizeof(T), alignment) : NULL;
+		if (mArray)
+		{
+			LLVector4a::memcpyNonAliased16((F32*) new_buf, (F32*) mArray, sizeof(T)*mElementCount);
+			ll_aligned_free(mArray);
+		}
+
+		/*for (U32 i = mElementCount; i < mCapacity; ++i)
+		{
+			new(new_buf+i) T();
+		}*/
+		mArray = new_buf;
+	}
+
+	mElementCount = size;
+}
+
+
+template <class T, U32 alignment>
+T& LLAlignedArray<T, alignment>::operator[](int idx)
+{
+	llassert(idx < mElementCount);
+	return mArray[idx];
+}
+
+template <class T, U32 alignment>
+const T& LLAlignedArray<T, alignment>::operator[](int idx) const
+{
+	llassert(idx < mElementCount);
+	return mArray[idx];
+}
+
+template <class T, U32 alignment>
+T* LLAlignedArray<T, alignment>::append(S32 N)
+{
+	U32 sz = size();
+	resize(sz+N);
+	return &((*this)[sz]);
+}
+
+
 BOOL check_same_clock_dir( const LLVector3& pt1, const LLVector3& pt2, const LLVector3& pt3, const LLVector3& norm)
 {    
 	LLVector3 test = (pt2-pt1)%(pt3-pt2);
@@ -474,7 +563,7 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3
 	const F32 tableScale[] = { 1, 1, 1, 0.5f, 0.707107f, 0.53f, 0.525f, 0.5f };
 	F32 scale = 0.5f;
 	F32 t, t_step, t_first, t_fraction, ang, ang_step;
-	LLVector3 pt1,pt2;
+	LLVector4a pt1,pt2;
 
 	F32 begin  = params.getBegin();
 	F32 end    = params.getEnd();
@@ -497,20 +586,21 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3
 	// Starting t and ang values for the first face
 	t = t_first;
 	ang = 2.0f*F_PI*(t*ang_scale + offset);
-	pt1.setVec(cos(ang)*scale,sin(ang)*scale, t);
+	pt1.set(cos(ang)*scale,sin(ang)*scale, t);
 
 	// Increment to the next point.
 	// pt2 is the end point on the fractional face
 	t += t_step;
 	ang += ang_step;
-	pt2.setVec(cos(ang)*scale,sin(ang)*scale,t);
+	pt2.set(cos(ang)*scale,sin(ang)*scale,t);
 
 	t_fraction = (begin - t_first)*sides;
 
 	// Only use if it's not almost exactly on an edge.
 	if (t_fraction < 0.9999f)
 	{
-		LLVector3 new_pt = lerp(pt1, pt2, t_fraction);
+		LLVector4a new_pt;
+		new_pt.setLerp(pt1, pt2, t_fraction);
 		mProfile.push_back(new_pt);
 	}
 
@@ -518,12 +608,17 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3
 	while (t < end)
 	{
 		// Iterate through all the integer steps of t.
-		pt1.setVec(cos(ang)*scale,sin(ang)*scale,t);
+		pt1.set(cos(ang)*scale,sin(ang)*scale,t);
 
 		if (mProfile.size() > 0) {
-			LLVector3 p = mProfile[mProfile.size()-1];
+			LLVector4a p = mProfile[mProfile.size()-1];
 			for (S32 i = 0; i < split && mProfile.size() > 0; i++) {
-				mProfile.push_back(p+(pt1-p) * 1.0f/(float)(split+1) * (float)(i+1));
+				//mProfile.push_back(p+(pt1-p) * 1.0f/(float)(split+1) * (float)(i+1));
+				LLVector4a new_pt;
+				new_pt.setSub(pt1, p);
+				new_pt.mul(1.0f/(float)(split+1) * (float)(i+1));
+				new_pt.add(p);
+				mProfile.push_back(new_pt);
 			}
 		}
 		mProfile.push_back(pt1);
@@ -536,18 +631,25 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3
 
 	// pt1 is the first point on the fractional face
 	// pt2 is the end point on the fractional face
-	pt2.setVec(cos(ang)*scale,sin(ang)*scale,t);
+	pt2.set(cos(ang)*scale,sin(ang)*scale,t);
 
 	// Find the fraction that we need to add to the end point.
 	t_fraction = (end - (t - t_step))*sides;
 	if (t_fraction > 0.0001f)
 	{
-		LLVector3 new_pt = lerp(pt1, pt2, t_fraction);
+		LLVector4a new_pt;
+		new_pt.setLerp(pt1, pt2, t_fraction);
 		
 		if (mProfile.size() > 0) {
-			LLVector3 p = mProfile[mProfile.size()-1];
+			LLVector4a p = mProfile[mProfile.size()-1];
 			for (S32 i = 0; i < split && mProfile.size() > 0; i++) {
-				mProfile.push_back(p+(new_pt-p) * 1.0f/(float)(split+1) * (float)(i+1));
+				//mProfile.push_back(p+(new_pt-p) * 1.0f/(float)(split+1) * (float)(i+1));
+
+				LLVector4a pt1;
+				pt1.setSub(new_pt, p);
+				pt1.mul(1.0f/(float)(split+1) * (float)(i+1));
+				pt1.add(p);
+				mProfile.push_back(pt1);
 			}
 		}
 		mProfile.push_back(new_pt);
@@ -568,7 +670,7 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3
 		if (params.getHollow() <= 0)
 		{
 			// put center point if not hollow.
-			mProfile.push_back(LLVector3(0,0,0));
+			mProfile.push_back(LLVector4a(0,0,0));
 		}
 	}
 	else
@@ -581,103 +683,6 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3
 	mTotal = mProfile.size();
 }
 
-void LLProfile::genNormals(const LLProfileParams& params)
-{
-	S32 count = mProfile.size();
-
-	S32 outer_count;
-	if (mTotalOut)
-	{
-		outer_count = mTotalOut;
-	}
-	else
-	{
-		outer_count = mTotal / 2;
-	}
-
-	mEdgeNormals.resize(count * 2);
-	mEdgeCenters.resize(count * 2);
-	mNormals.resize(count);
-
-	LLVector2 pt0,pt1;
-
-	BOOL hollow = (params.getHollow() > 0);
-
-	S32 i0, i1, i2, i3, i4;
-
-	// Parametrically generate normal
-	for (i2 = 0; i2 < count; i2++)
-	{
-		mNormals[i2].mV[0] = mProfile[i2].mV[0];
-		mNormals[i2].mV[1] = mProfile[i2].mV[1];
-		if (hollow && (i2 >= outer_count))
-		{
-			mNormals[i2] *= -1.f;
-		}
-		if (mNormals[i2].magVec() < 0.001)
-		{
-			// Special case for point at center, get adjacent points.
-			i1 = (i2 - 1) >= 0 ? i2 - 1 : count - 1;
-			i0 = (i1 - 1) >= 0 ? i1 - 1 : count - 1;
-			i3 = (i2 + 1) < count ? i2 + 1 : 0;
-			i4 = (i3 + 1) < count ? i3 + 1 : 0;
-
-			pt0.setVec(mProfile[i1].mV[VX] + mProfile[i1].mV[VX] - mProfile[i0].mV[VX], 
-				mProfile[i1].mV[VY] + mProfile[i1].mV[VY] - mProfile[i0].mV[VY]);
-			pt1.setVec(mProfile[i3].mV[VX] + mProfile[i3].mV[VX] - mProfile[i4].mV[VX], 
-				mProfile[i3].mV[VY] + mProfile[i3].mV[VY] - mProfile[i4].mV[VY]);
-
-			mNormals[i2] = pt0 + pt1;
-			mNormals[i2] *= 0.5f;
-		}
-		mNormals[i2].normVec();
-	}
-
-	S32 num_normal_sets = isConcave() ? 2 : 1;
-	for (S32 normal_set = 0; normal_set < num_normal_sets; normal_set++)
-	{
-		S32 point_num;
-		for (point_num = 0; point_num < mTotal; point_num++)
-		{
-			LLVector3 point_1 = mProfile[point_num];
-			point_1.mV[VZ] = 0.f;
-
-			LLVector3 point_2;
-			
-			if (isConcave() && normal_set == 0 && point_num == (mTotal - 1) / 2)
-			{
-				point_2 = mProfile[mTotal - 1];
-			}
-			else if (isConcave() && normal_set == 1 && point_num == mTotal - 1)
-			{
-				point_2 = mProfile[(mTotal - 1) / 2];
-			}
-			else
-			{
-				LLVector3 delta_pos;
-				S32 neighbor_point = (point_num + 1) % mTotal;
-				while(delta_pos.magVecSquared() < 0.01f * 0.01f)
-				{
-					point_2 = mProfile[neighbor_point];
-					delta_pos = point_2 - point_1;
-					neighbor_point = (neighbor_point + 1) % mTotal;
-					if (neighbor_point == point_num)
-					{
-						break;
-					}
-				}
-			}
-
-			point_2.mV[VZ] = 0.f;
-			LLVector3 face_normal = (point_2 - point_1) % LLVector3::z_axis;
-			face_normal.normVec();
-			mEdgeNormals[normal_set * count + point_num] = face_normal;
-			mEdgeCenters[normal_set * count + point_num] = lerp(point_1, point_2, 0.5f);
-		}
-	}
-}
-
-
 // Hollow is percent of the original bounding box, not of this particular
 // profile's geometry.  Thus, a swept triangle needs lower hollow values than
 // a swept square.
@@ -693,12 +698,13 @@ LLProfile::Face* LLProfile::addHole(const LLProfileParams& params, BOOL flat, F3
 
 	Face *face = addFace(mTotalOut, mTotal-mTotalOut,0,LL_FACE_INNER_SIDE, flat);
 
-	std::vector<LLVector3> pt;
+	static LLAlignedArray<LLVector4a,64> pt;
 	pt.resize(mTotal) ;
 
 	for (S32 i=mTotalOut;i<mTotal;i++)
 	{
-		pt[i] = mProfile[i] * box_hollow;
+		pt[i] = mProfile[i];
+		pt[i].mul(box_hollow);
 	}
 
 	S32 j=mTotal-1;
@@ -844,8 +850,8 @@ BOOL LLProfile::generate(const LLProfileParams& params, BOOL path_open,F32 detai
 		detail = MIN_LOD;
 	}
 
-	mProfile.clear();
-	mFaces.clear();
+	mProfile.resize(0);
+	mFaces.resize(0);
 
 	// Generate the face data
 	S32 i;
@@ -877,10 +883,12 @@ BOOL LLProfile::generate(const LLProfileParams& params, BOOL path_open,F32 detai
 				addFace((face_num++) * (split +1), split+2, 1, LL_FACE_OUTER_SIDE_0 << i, TRUE);
 			}
 
+			LLVector4a scale(1,1,4,1);
+
 			for (i = 0; i <(S32) mProfile.size(); i++)
 			{
 				// Scale by 4 to generate proper tex coords.
-				mProfile[i].mV[2] *= 4.f;
+				mProfile[i].mul(scale);
 			}
 
 			if (hollow)
@@ -913,10 +921,11 @@ BOOL LLProfile::generate(const LLProfileParams& params, BOOL path_open,F32 detai
 	case  LL_PCODE_PROFILE_EQUALTRI:
 		{
 			genNGon(params, 3,0, 0, 1, split);
+			LLVector4a scale(1,1,3,1);
 			for (i = 0; i <(S32) mProfile.size(); i++)
 			{
 				// Scale by 3 to generate proper tex coords.
-				mProfile[i].mV[2] *= 3.f;
+				mProfile[i].mul(scale);
 			}
 
 			if (path_open)
@@ -1094,8 +1103,6 @@ BOOL LLProfile::generate(const LLProfileParams& params, BOOL path_open,F32 detai
 			addFace(mTotal-2, 2,0.5,LL_FACE_PROFILE_END, TRUE);
 		}
 	}
-	
-	//genNormals(params);
 
 	return TRUE;
 }
@@ -1379,25 +1386,29 @@ void LLPath::genNGon(const LLPathParams& params, S32 sides, F32 startOff, F32 en
 	// the path begins at the correct cut.
 	F32 step= 1.0f / sides;
 	F32 t	= params.getBegin();
-	pt		= vector_append(mPath, 1);
+	pt		= mPath.append(1);
 	ang		= 2.0f*F_PI*revolutions * t;
 	s		= sin(ang)*lerp(radius_start, radius_end, t);	
 	c		= cos(ang)*lerp(radius_start, radius_end, t);
 
 
-	pt->mPos.setVec(0 + lerp(0,params.getShear().mV[0],s)
+	pt->mPos.set(0 + lerp(0,params.getShear().mV[0],s)
 					  + lerp(-skew ,skew, t) * 0.5f,
 					c + lerp(0,params.getShear().mV[1],s), 
 					s);
-	pt->mScale.mV[VX] = hole_x * lerp(taper_x_begin, taper_x_end, t);
-	pt->mScale.mV[VY] = hole_y * lerp(taper_y_begin, taper_y_end, t);
+	pt->mScale.set(hole_x * lerp(taper_x_begin, taper_x_end, t),
+		hole_y * lerp(taper_y_begin, taper_y_end, t),
+		0,1);
 	pt->mTexT  = t;
 	
 	// Twist rotates the path along the x,y plane (I think) - DJS 04/05/02
 	twist.setQuat  (lerp(twist_begin,twist_end,t) * 2.f * F_PI - F_PI,0,0,1);
 	// Rotate the point around the circle's center.
 	qang.setQuat   (ang,path_axis);
-	pt->mRot   = twist * qang;
+
+	LLMatrix3 rot(twist * qang);
+
+	pt->mRot.loadu(rot);
 
 	t+=step;
 
@@ -1408,51 +1419,55 @@ void LLPath::genNGon(const LLPathParams& params, S32 sides, F32 startOff, F32 en
 	// Run through the non-cut dependent points.
 	while (t < params.getEnd())
 	{
-		pt		= vector_append(mPath, 1);
+		pt		= mPath.append(1);
 
 		ang = 2.0f*F_PI*revolutions * t;
 		c   = cos(ang)*lerp(radius_start, radius_end, t);
 		s   = sin(ang)*lerp(radius_start, radius_end, t);
 
-		pt->mPos.setVec(0 + lerp(0,params.getShear().mV[0],s)
+		pt->mPos.set(0 + lerp(0,params.getShear().mV[0],s)
 					      + lerp(-skew ,skew, t) * 0.5f,
 						c + lerp(0,params.getShear().mV[1],s), 
 						s);
 
-		pt->mScale.mV[VX] = hole_x * lerp(taper_x_begin, taper_x_end, t);
-		pt->mScale.mV[VY] = hole_y * lerp(taper_y_begin, taper_y_end, t);
+		pt->mScale.set(hole_x * lerp(taper_x_begin, taper_x_end, t),
+					hole_y * lerp(taper_y_begin, taper_y_end, t),
+					0,1);
 		pt->mTexT  = t;
 
 		// Twist rotates the path along the x,y plane (I think) - DJS 04/05/02
 		twist.setQuat  (lerp(twist_begin,twist_end,t) * 2.f * F_PI - F_PI,0,0,1);
 		// Rotate the point around the circle's center.
 		qang.setQuat   (ang,path_axis);
-		pt->mRot	= twist * qang;
-
+		LLMatrix3 tmp(twist*qang);
+		pt->mRot.loadu(tmp);
+		
 		t+=step;
 	}
 
 	// Make one final pass for the end cut.
 	t = params.getEnd();
-	pt		= vector_append(mPath, 1);
+	pt		= mPath.append(1);
 	ang = 2.0f*F_PI*revolutions * t;
 	c   = cos(ang)*lerp(radius_start, radius_end, t);
 	s   = sin(ang)*lerp(radius_start, radius_end, t);
 
-	pt->mPos.setVec(0 + lerp(0,params.getShear().mV[0],s)
+	pt->mPos.set(0 + lerp(0,params.getShear().mV[0],s)
 					  + lerp(-skew ,skew, t) * 0.5f,
 					c + lerp(0,params.getShear().mV[1],s), 
 					s);
-	pt->mScale.mV[VX] = hole_x * lerp(taper_x_begin, taper_x_end, t);
-	pt->mScale.mV[VY] = hole_y * lerp(taper_y_begin, taper_y_end, t);
+	pt->mScale.set(hole_x * lerp(taper_x_begin, taper_x_end, t),
+				   hole_y * lerp(taper_y_begin, taper_y_end, t),
+				   0,1);
 	pt->mTexT  = t;
 	
 	// Twist rotates the path along the x,y plane (I think) - DJS 04/05/02
 	twist.setQuat  (lerp(twist_begin,twist_end,t) * 2.f * F_PI - F_PI,0,0,1);
 	// Rotate the point around the circle's center.
 	qang.setQuat   (ang,path_axis);
-	pt->mRot   = twist * qang;
-
+	LLMatrix3 tmp(twist*qang);
+	pt->mRot.loadu(tmp);
+	
 	mTotal = mPath.size();
 }
 
@@ -1549,7 +1564,7 @@ BOOL LLPath::generate(const LLPathParams& params, F32 detail, S32 split,
 	mDirty = FALSE;
 	S32 np = 2; // hardcode for line
 
-	mPath.clear();
+	mPath.resize(0);
 	mOpen = TRUE;
 
 	// Is this 0xf0 mask really necessary?  DK 03/02/05
@@ -1575,12 +1590,16 @@ BOOL LLPath::generate(const LLPathParams& params, F32 detail, S32 split,
 			for (S32 i=0;i<np;i++)
 			{
 				F32 t = lerp(params.getBegin(),params.getEnd(),(F32)i * mStep);
-				mPath[i].mPos.setVec(lerp(0,params.getShear().mV[0],t),
+				mPath[i].mPos.set(lerp(0,params.getShear().mV[0],t),
 									 lerp(0,params.getShear().mV[1],t),
 									 t - 0.5f);
-				mPath[i].mRot.setQuat(lerp(F_PI * params.getTwistBegin(),F_PI * params.getTwist(),t),0,0,1);
-				mPath[i].mScale.mV[0] = lerp(start_scale.mV[0],end_scale.mV[0],t);
-				mPath[i].mScale.mV[1] = lerp(start_scale.mV[1],end_scale.mV[1],t);
+				LLQuaternion quat;
+				quat.setQuat(lerp(F_PI * params.getTwistBegin(),F_PI * params.getTwist(),t),0,0,1);
+				LLMatrix3 tmp(quat);
+				mPath[i].mRot.loadu(tmp);
+				mPath[i].mScale.set(lerp(start_scale.mV[0],end_scale.mV[0],t),
+									lerp(start_scale.mV[1],end_scale.mV[1],t),
+									0,1);
 				mPath[i].mTexT        = t;
 			}
 		}
@@ -1617,7 +1636,7 @@ BOOL LLPath::generate(const LLPathParams& params, F32 detail, S32 split,
 			F32 toggle = 0.5f;
 			for (S32 i=0;i<(S32)mPath.size();i++)
 			{
-				mPath[i].mPos.mV[0] = toggle;
+				mPath[i].mPos.getF32ptr()[0] = toggle;
 				if (toggle == 0.5f)
 					toggle = -0.5f;
 				else
@@ -1638,13 +1657,16 @@ BOOL LLPath::generate(const LLPathParams& params, F32 detail, S32 split,
 		for (S32 i=0;i<np;i++)
 		{
 			F32 t = (F32)i * mStep;
-			mPath[i].mPos.setVec(0,
+			mPath[i].mPos.set(0,
 								lerp(0,   -sin(F_PI*params.getTwist()*t)*0.5f,t),
 								lerp(-0.5, cos(F_PI*params.getTwist()*t)*0.5f,t));
-			mPath[i].mScale.mV[0] = lerp(1,params.getScale().mV[0],t);
-			mPath[i].mScale.mV[1] = lerp(1,params.getScale().mV[1],t);
+			mPath[i].mScale.set(lerp(1,params.getScale().mV[0],t),
+								lerp(1,params.getScale().mV[1],t), 0,1);
 			mPath[i].mTexT  = t;
-			mPath[i].mRot.setQuat(F_PI * params.getTwist() * t,1,0,0);
+			LLQuaternion quat;
+			quat.setQuat(F_PI * params.getTwist() * t,1,0,0);
+			LLMatrix3 tmp(quat);
+			mPath[i].mRot.loadu(tmp);
 		}
 
 		break;
@@ -1668,11 +1690,15 @@ BOOL LLDynamicPath::generate(const LLPathParams& params, F32 detail, S32 split,
 		// Path hasn't been generated yet.
 		// Some algorithms later assume at least TWO path points.
 		resizePath(2);
+		LLQuaternion quat;
+		quat.setQuat(0,0,0);
+		LLMatrix3 tmp(quat);
+
 		for (U32 i = 0; i < 2; i++)
 		{
-			mPath[i].mPos.setVec(0, 0, 0);
-			mPath[i].mRot.setQuat(0, 0, 0);
-			mPath[i].mScale.setVec(1, 1);
+			mPath[i].mPos.set(0, 0, 0);
+			mPath[i].mRot.loadu(tmp);
+			mPath[i].mScale.set(1, 1, 0, 1);
 			mPath[i].mTexT = 0;
 		}
 	}
@@ -2045,7 +2071,7 @@ LLVolume::LLVolume(const LLVolumeParams &params, const F32 detail, const BOOL ge
 	mHullIndices = NULL;
 	mNumHullPoints = 0;
 	mNumHullIndices = 0;
-
+	
 	// set defaults
 	if (mParams.getPathParams().getCurveType() == LL_PCODE_PATH_FLEXIBLE)
 	{
@@ -2105,6 +2131,7 @@ LLVolume::~LLVolume()
 
 BOOL LLVolume::generate()
 {
+	LL_CHECK_MEMORY
 	llassert_always(mProfilep);
 	
 	//Added 10.03.05 Dave Parks
@@ -2141,20 +2168,6 @@ BOOL LLVolume::generate()
 		mLODScaleBias.setVec(0.6f, 0.6f, 0.6f);
 	}
 	
-	//********************************************************************
-	//debug info, to be removed
-	if((U32)(mPathp->mPath.size() * mProfilep->mProfile.size()) > (1u << 20))
-	{
-		llinfos << "sizeS: " << mPathp->mPath.size() << " sizeT: " << mProfilep->mProfile.size() << llendl ;
-		llinfos << "path_detail : " << path_detail << " split: " << split << " profile_detail: " << profile_detail << llendl ;
-		llinfos << mParams << llendl ;
-		llinfos << "more info to check if mProfilep is deleted or not." << llendl ;
-		llinfos << mProfilep->mNormals.size() << " : " << mProfilep->mFaces.size() << " : " << mProfilep->mEdgeNormals.size() << " : " << mProfilep->mEdgeCenters.size() << llendl ;
-
-		llerrs << "LLVolume corrupted!" << llendl ;
-	}
-	//********************************************************************
-
 	BOOL regenPath = mPathp->generate(mParams.getPathParams(), path_detail, split);
 	BOOL regenProf = mProfilep->generate(mParams.getProfileParams(), mPathp->isOpen(),profile_detail, split);
 
@@ -2163,21 +2176,6 @@ BOOL LLVolume::generate()
 		S32 sizeS = mPathp->mPath.size();
 		S32 sizeT = mProfilep->mProfile.size();
 
-		//********************************************************************
-		//debug info, to be removed
-		if((U32)(sizeS * sizeT) > (1u << 20))
-		{
-			llinfos << "regenPath: " << (S32)regenPath << " regenProf: " << (S32)regenProf << llendl ;
-			llinfos << "sizeS: " << sizeS << " sizeT: " << sizeT << llendl ;
-			llinfos << "path_detail : " << path_detail << " split: " << split << " profile_detail: " << profile_detail << llendl ;
-			llinfos << mParams << llendl ;
-			llinfos << "more info to check if mProfilep is deleted or not." << llendl ;
-			llinfos << mProfilep->mNormals.size() << " : " << mProfilep->mFaces.size() << " : " << mProfilep->mEdgeNormals.size() << " : " << mProfilep->mEdgeCenters.size() << llendl ;
-
-			llerrs << "LLVolume corrupted!" << llendl ;
-		}
-		//********************************************************************
-
 		sNumMeshPoints -= mMesh.size();
 		mMesh.resize(sizeT * sizeS);
 		sNumMeshPoints += mMesh.size();		
@@ -2185,22 +2183,39 @@ BOOL LLVolume::generate()
 		//generate vertex positions
 
 		// Run along the path.
+		LLVector4a* dst = mMesh.mArray;
+
 		for (S32 s = 0; s < sizeS; ++s)
 		{
-			LLVector2  scale = mPathp->mPath[s].mScale;
-			LLQuaternion rot = mPathp->mPath[s].mRot;
+			F32* scale = mPathp->mPath[s].mScale.getF32ptr();
+			
+			F32 sc [] = 
+			{ scale[0], 0, 0, 0,
+				0, scale[1], 0, 0,
+				0, 0, scale[2], 0,
+					0, 0, 0, 1 };
+			
+			LLMatrix4 rot((F32*) mPathp->mPath[s].mRot.mMatrix);
+			LLMatrix4 scale_mat(sc);
+			
+			scale_mat *= rot;
+			
+			LLMatrix4a rot_mat;
+			rot_mat.loadu(scale_mat);
+			
+			LLVector4a* profile = mProfilep->mProfile.mArray;
+			LLVector4a* end_profile = profile+sizeT;
+			LLVector4a offset = mPathp->mPath[s].mPos;
+
+			LLVector4a tmp;
 
 			// Run along the profile.
-			for (S32 t = 0; t < sizeT; ++t)
+			while (profile < end_profile)
 			{
-				S32 m = s*sizeT + t;
-				Point& pt = mMesh[m];
-				
-				pt.mPos.mV[0] = mProfilep->mProfile[t].mV[0] * scale.mV[0];
-				pt.mPos.mV[1] = mProfilep->mProfile[t].mV[1] * scale.mV[1];
-				pt.mPos.mV[2] = 0.0f;
-				pt.mPos       = pt.mPos * rot;
-				pt.mPos      += mPathp->mPath[s].mPos;
+				rot_mat.rotate(*profile++, tmp);
+				dst->setAdd(tmp,offset);
+				llassert(less_than_max_mag(*dst));
+				++dst;
 			}
 		}
 
@@ -2210,9 +2225,11 @@ BOOL LLVolume::generate()
 			LLFaceID id = iter->mFaceID;
 			mFaceMask |= id;
 		}
-		
+		LL_CHECK_MEMORY
 		return TRUE;
 	}
+
+	LL_CHECK_MEMORY
 	return FALSE;
 }
 
@@ -2790,14 +2807,16 @@ void LLVolume::createVolumeFaces()
 }
 
 
-inline LLVector3 sculpt_rgb_to_vector(U8 r, U8 g, U8 b)
+inline LLVector4a sculpt_rgb_to_vector(U8 r, U8 g, U8 b)
 {
 	// maps RGB values to vector values [0..255] -> [-0.5..0.5]
-	LLVector3 value;
-	value.mV[VX] = r / 255.f - 0.5f;
-	value.mV[VY] = g / 255.f - 0.5f;
-	value.mV[VZ] = b / 255.f - 0.5f;
+	LLVector4a value;
+	LLVector4a sub(0.5f, 0.5f, 0.5f);
 
+	value.set(r,g,b);
+	value.mul(1.f/255.f);
+	value.sub(sub);
+	
 	return value;
 }
 
@@ -2817,21 +2836,21 @@ inline U32 sculpt_st_to_index(S32 s, S32 t, S32 size_s, S32 size_t, U16 sculpt_w
 }
 
 
-inline LLVector3 sculpt_index_to_vector(U32 index, const U8* sculpt_data)
+inline LLVector4a sculpt_index_to_vector(U32 index, const U8* sculpt_data)
 {
-	LLVector3 v = sculpt_rgb_to_vector(sculpt_data[index], sculpt_data[index+1], sculpt_data[index+2]);
+	LLVector4a v = sculpt_rgb_to_vector(sculpt_data[index], sculpt_data[index+1], sculpt_data[index+2]);
 
 	return v;
 }
 
-inline LLVector3 sculpt_st_to_vector(S32 s, S32 t, S32 size_s, S32 size_t, U16 sculpt_width, U16 sculpt_height, S8 sculpt_components, const U8* sculpt_data)
+inline LLVector4a sculpt_st_to_vector(S32 s, S32 t, S32 size_s, S32 size_t, U16 sculpt_width, U16 sculpt_height, S8 sculpt_components, const U8* sculpt_data)
 {
 	U32 index = sculpt_st_to_index(s, t, size_s, size_t, sculpt_width, sculpt_height, sculpt_components);
 
 	return sculpt_index_to_vector(index, sculpt_data);
 }
 
-inline LLVector3 sculpt_xy_to_vector(U32 x, U32 y, U16 sculpt_width, U16 sculpt_height, S8 sculpt_components, const U8* sculpt_data)
+inline LLVector4a sculpt_xy_to_vector(U32 x, U32 y, U16 sculpt_width, U16 sculpt_height, S8 sculpt_components, const U8* sculpt_data)
 {
 	U32 index = sculpt_xy_to_index(x, y, sculpt_width, sculpt_height, sculpt_components);
 
@@ -2853,15 +2872,26 @@ F32 LLVolume::sculptGetSurfaceArea()
 		for (S32 t = 0; t < sizeT-1; t++)
 		{
 			// get four corners of quad
-			LLVector3 p1 = mMesh[(s  )*sizeT + (t  )].mPos;
-			LLVector3 p2 = mMesh[(s+1)*sizeT + (t  )].mPos;
-			LLVector3 p3 = mMesh[(s  )*sizeT + (t+1)].mPos;
-			LLVector3 p4 = mMesh[(s+1)*sizeT + (t+1)].mPos;
+			LLVector4a& p1 = mMesh[(s  )*sizeT + (t  )];
+			LLVector4a& p2 = mMesh[(s+1)*sizeT + (t  )];
+			LLVector4a& p3 = mMesh[(s  )*sizeT + (t+1)];
+			LLVector4a& p4 = mMesh[(s+1)*sizeT + (t+1)];
 
 			// compute the area of the quad by taking the length of the cross product of the two triangles
-			LLVector3 cross1 = (p1 - p2) % (p1 - p3);
-			LLVector3 cross2 = (p4 - p2) % (p4 - p3);
-			area += (cross1.magVec() + cross2.magVec()) / 2.f;
+			LLVector4a v0,v1,v2,v3;
+			v0.setSub(p1,p2);
+			v1.setSub(p1,p3);
+			v2.setSub(p4,p2);
+			v3.setSub(p4,p3);
+
+			LLVector4a cross1, cross2;
+			cross1.setCross3(v0,v1);
+			cross2.setCross3(v2,v3);
+
+			//LLVector3 cross1 = (p1 - p2) % (p1 - p3);
+			//LLVector3 cross2 = (p4 - p2) % (p4 - p3);
+			
+			area += (cross1.getLength3() + cross2.getLength3()).getF32() / 2.f;
 		}
 	}
 
@@ -2882,17 +2912,19 @@ void LLVolume::sculptGeneratePlaceholder()
 		for (S32 t = 0; t < sizeT; t++)
 		{
 			S32 i = t + line;
-			Point& pt = mMesh[i];
+			LLVector4a& pt = mMesh[i];
 
 			
 			F32 u = (F32)s/(sizeS-1);
 			F32 v = (F32)t/(sizeT-1);
 
 			const F32 RADIUS = (F32) 0.3;
-					
-			pt.mPos.mV[0] = (F32)(sin(F_PI * v) * cos(2.0 * F_PI * u) * RADIUS);
-			pt.mPos.mV[1] = (F32)(sin(F_PI * v) * sin(2.0 * F_PI * u) * RADIUS);
-			pt.mPos.mV[2] = (F32)(cos(F_PI * v) * RADIUS);
+			
+			F32* p = pt.getF32ptr();
+
+			p[0] = (F32)(sin(F_PI * v) * cos(2.0 * F_PI * u) * RADIUS);
+			p[1] = (F32)(sin(F_PI * v) * sin(2.0 * F_PI * u) * RADIUS);
+			p[2] = (F32)(cos(F_PI * v) * RADIUS);
 
 		}
 		line += sizeT;
@@ -2917,7 +2949,7 @@ void LLVolume::sculptGenerateMapVertices(U16 sculpt_width, U16 sculpt_height, S8
 		for (S32 t = 0; t < sizeT; t++)
 		{
 			S32 i = t + line;
-			Point& pt = mMesh[i];
+			LLVector4a& pt = mMesh[i];
 
 			S32 reversed_t = t;
 
@@ -2974,11 +3006,12 @@ void LLVolume::sculptGenerateMapVertices(U16 sculpt_width, U16 sculpt_height, S8
 				}
 			}
 
-			pt.mPos = sculpt_xy_to_vector(x, y, sculpt_width, sculpt_height, sculpt_components, sculpt_data);
+			pt = sculpt_xy_to_vector(x, y, sculpt_width, sculpt_height, sculpt_components, sculpt_data);
 
 			if (sculpt_mirror)
 			{
-				pt.mPos.mV[VX] *= -1.f;
+				LLVector4a scale(-1.f,1,1,1);
+				pt.mul(scale);
 			}
 		}
 		
@@ -3560,803 +3593,125 @@ bool LLVolumeParams::validate(U8 prof_curve, F32 prof_begin, F32 prof_end, F32 h
 	return true;
 }
 
-S32 *LLVolume::getTriangleIndices(U32 &num_indices) const
+void LLVolume::getLoDTriangleCounts(const LLVolumeParams& params, S32* counts)
+{ //attempt to approximate the number of triangles that will result from generating a volume LoD set for the 
+	//supplied LLVolumeParams -- inaccurate, but a close enough approximation for determining streaming cost
+	F32 detail[] = {1.f, 1.5f, 2.5f, 4.f};	
+	for (S32 i = 0; i < 4; i++)
+	{
+		S32 count = 0;
+		S32 path_points = LLPath::getNumPoints(params.getPathParams(), detail[i]);
+		S32 profile_points = LLProfile::getNumPoints(params.getProfileParams(), false, detail[i]);
+
+		count = (profile_points-1)*2*(path_points-1);
+		count += profile_points*2;
+
+		counts[i] = count;
+	}
+}
+
+
+S32 LLVolume::getNumTriangles(S32* vcount) const
 {
-	S32 expected_num_triangle_indices = getNumTriangleIndices();
-	if (expected_num_triangle_indices > MAX_VOLUME_TRIANGLE_INDICES)
+	U32 triangle_count = 0;
+	U32 vertex_count = 0;
+
+	for (S32 i = 0; i < getNumVolumeFaces(); ++i)
 	{
-		// we don't allow LLVolumes with this many vertices
-		llwarns << "Couldn't allocate triangle indices" << llendl;
-		num_indices = 0;
-		return NULL;
+		const LLVolumeFace& face = getVolumeFace(i);
+		triangle_count += face.mNumIndices/3;
+
+		vertex_count += face.mNumVertices;
 	}
 
-	S32* index = new S32[expected_num_triangle_indices];
-	S32 count = 0;
 
-	// Let's do this totally diffently, as we don't care about faces...
-	// Counter-clockwise triangles are forward facing...
+	if (vcount)
+	{
+		*vcount = vertex_count;
+	}
+	
+	return triangle_count;
+}
+
 
-	BOOL open = getProfile().isOpen();
-	BOOL hollow = (mParams.getProfileParams().getHollow() > 0);
-	BOOL path_open = getPath().isOpen();
-	S32 size_s, size_s_out, size_t;
-	S32 s, t, i;
-	size_s = getProfile().getTotal();
-	size_s_out = getProfile().getTotalOut();
-	size_t = getPath().mPath.size();
+//-----------------------------------------------------------------------------
+// generateSilhouetteVertices()
+//-----------------------------------------------------------------------------
+void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices,
+										  std::vector<LLVector3> &normals,
+										  const LLVector3& obj_cam_vec_in,
+										  const LLMatrix4& mat_in,
+										  const LLMatrix3& norm_mat_in,
+										  S32 face_mask)
+{
+	LLMatrix4a mat;
+	mat.loadu(mat_in);
+
+	LLMatrix4a norm_mat;
+	norm_mat.loadu(norm_mat_in);
+		
+	LLVector4a obj_cam_vec;
+	obj_cam_vec.load3(obj_cam_vec_in.mV);
 
-	// NOTE -- if the construction of the triangles below ever changes
-	// then getNumTriangleIndices() method may also have to be updated.
+	vertices.clear();
+	normals.clear();
 
-	if (open)		/* Flawfinder: ignore */
+	if ((mParams.getSculptType() & LL_SCULPT_TYPE_MASK) == LL_SCULPT_TYPE_MESH)
 	{
-		if (hollow)
+		return;
+	}
+	
+	S32 cur_index = 0;
+	//for each face
+	for (face_list_t::iterator iter = mVolumeFaces.begin();
+		 iter != mVolumeFaces.end(); ++iter)
+	{
+		LLVolumeFace& face = *iter;
+	
+		if (!(face_mask & (0x1 << cur_index++)) ||
+		     face.mNumIndices == 0 || face.mEdge.empty())
 		{
-			// Open hollow -- much like the closed solid, except we 
-			// we need to stitch up the gap between s=0 and s=size_s-1
+			continue;
+		}
 
-			for (t = 0; t < size_t - 1; t++)
-			{
-				// The outer face, first cut, and inner face
-				for (s = 0; s < size_s - 1; s++)
-				{
-					i  = s + t*size_s;
-					index[count++]  = i;				// x,y
-					index[count++]  = i + 1;			// x+1,y
-					index[count++]  = i + size_s;		// x,y+1
+		if (face.mTypeMask & (LLVolumeFace::CAP_MASK)) {
 	
-					index[count++]  = i + size_s;		// x,y+1
-					index[count++]  = i + 1;			// x+1,y
-					index[count++]  = i + size_s + 1;	// x+1,y+1
-				}
+		}
+		else {
 
-				// The other cut face
-				index[count++]  = s + t*size_s;		// x,y
-				index[count++]  = 0 + t*size_s;		// x+1,y
-				index[count++]  = s + (t+1)*size_s;	// x,y+1
-	
-				index[count++]  = s + (t+1)*size_s;	// x,y+1
-				index[count++]  = 0 + t*size_s;		// x+1,y
-				index[count++]  = 0 + (t+1)*size_s;	// x+1,y+1
-			}
+			//==============================================
+			//DEBUG draw edge map instead of silhouette edge
+			//==============================================
 
-			// Do the top and bottom caps, if necessary
-			if (path_open)
-			{
-				// Top cap
-				S32 pt1 = 0;
-				S32 pt2 = size_s-1;
-				S32 i   = (size_t - 1)*size_s;
+#if DEBUG_SILHOUETTE_EDGE_MAP
 
-				while (pt2 - pt1 > 1)
-				{
-					// Use the profile points instead of the mesh, since you want
-					// the un-transformed profile distances.
-					LLVector3 p1 = getProfile().mProfile[pt1];
-					LLVector3 p2 = getProfile().mProfile[pt2];
-					LLVector3 pa = getProfile().mProfile[pt1+1];
-					LLVector3 pb = getProfile().mProfile[pt2-1];
-
-					p1.mV[VZ] = 0.f;
-					p2.mV[VZ] = 0.f;
-					pa.mV[VZ] = 0.f;
-					pb.mV[VZ] = 0.f;
-
-					// Use area of triangle to determine backfacing
-					F32 area_1a2, area_1ba, area_21b, area_2ab;
-					area_1a2 =  (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) +
-								(pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) +
-								(p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]);
-
-					area_1ba =  (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) +
-								(pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) +
-								(pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]);
-
-					area_21b =  (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) +
-								(p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) +
-								(pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]);
-
-					area_2ab =  (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) +
-								(pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) +
-								(pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]);
-
-					BOOL use_tri1a2 = TRUE;
-					BOOL tri_1a2 = TRUE;
-					BOOL tri_21b = TRUE;
-
-					if (area_1a2 < 0)
-					{
-						tri_1a2 = FALSE;
-					}
-					if (area_2ab < 0)
-					{
-						// Can't use, because it contains point b
-						tri_1a2 = FALSE;
-					}
-					if (area_21b < 0)
-					{
-						tri_21b = FALSE;
-					}
-					if (area_1ba < 0)
-					{
-						// Can't use, because it contains point b
-						tri_21b = FALSE;
-					}
+			//for each triangle
+			U32 count = face.mNumIndices;
+			for (U32 j = 0; j < count/3; j++) {
+				//get vertices
+				S32 v1 = face.mIndices[j*3+0];
+				S32 v2 = face.mIndices[j*3+1];
+				S32 v3 = face.mIndices[j*3+2];
 
-					if (!tri_1a2)
-					{
-						use_tri1a2 = FALSE;
-					}
-					else if (!tri_21b)
-					{
-						use_tri1a2 = TRUE;
-					}
-					else
-					{
-						LLVector3 d1 = p1 - pa;
-						LLVector3 d2 = p2 - pb;
+				//get current face center
+				LLVector3 cCenter = (face.mVertices[v1].getPosition() + 
+									face.mVertices[v2].getPosition() + 
+									face.mVertices[v3].getPosition()) / 3.0f;
 
-						if (d1.magVecSquared() < d2.magVecSquared())
-						{
-							use_tri1a2 = TRUE;
-						}
-						else
-						{
-							use_tri1a2 = FALSE;
-						}
+				//for each edge
+				for (S32 k = 0; k < 3; k++) {
+                    S32 nIndex = face.mEdge[j*3+k];
+					if (nIndex <= -1) {
+						continue;
 					}
 
-					if (use_tri1a2)
-					{
-						index[count++] = pt1 + i;
-						index[count++] = pt1 + 1 + i;
-						index[count++] = pt2 + i;
-						pt1++;
-					}
-					else
-					{
-						index[count++] = pt1 + i;
-						index[count++] = pt2 - 1 + i;
-						index[count++] = pt2 + i;
-						pt2--;
+					if (nIndex >= (S32) count/3) {
+						continue;
 					}
-				}
-
-				// Bottom cap
-				pt1          = 0;
-				pt2          = size_s-1;
-				while (pt2 - pt1 > 1)
-				{
-					// Use the profile points instead of the mesh, since you want
-					// the un-transformed profile distances.
-					LLVector3 p1 = getProfile().mProfile[pt1];
-					LLVector3 p2 = getProfile().mProfile[pt2];
-					LLVector3 pa = getProfile().mProfile[pt1+1];
-					LLVector3 pb = getProfile().mProfile[pt2-1];
-
-					p1.mV[VZ] = 0.f;
-					p2.mV[VZ] = 0.f;
-					pa.mV[VZ] = 0.f;
-					pb.mV[VZ] = 0.f;
-
-					// Use area of triangle to determine backfacing
-					F32 area_1a2, area_1ba, area_21b, area_2ab;
-					area_1a2 =  (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) +
-								(pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) +
-								(p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]);
-
-					area_1ba =  (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) +
-								(pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) +
-								(pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]);
-
-					area_21b =  (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) +
-								(p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) +
-								(pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]);
-
-					area_2ab =  (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) +
-								(pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) +
-								(pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]);
-
-					BOOL use_tri1a2 = TRUE;
-					BOOL tri_1a2 = TRUE;
-					BOOL tri_21b = TRUE;
-
-					if (area_1a2 < 0)
-					{
-						tri_1a2 = FALSE;
-					}
-					if (area_2ab < 0)
-					{
-						// Can't use, because it contains point b
-						tri_1a2 = FALSE;
-					}
-					if (area_21b < 0)
-					{
-						tri_21b = FALSE;
-					}
-					if (area_1ba < 0)
-					{
-						// Can't use, because it contains point b
-						tri_21b = FALSE;
-					}
-
-					if (!tri_1a2)
-					{
-						use_tri1a2 = FALSE;
-					}
-					else if (!tri_21b)
-					{
-						use_tri1a2 = TRUE;
-					}
-					else
-					{
-						LLVector3 d1 = p1 - pa;
-						LLVector3 d2 = p2 - pb;
-
-						if (d1.magVecSquared() < d2.magVecSquared())
-						{
-							use_tri1a2 = TRUE;
-						}
-						else
-						{
-							use_tri1a2 = FALSE;
-						}
-					}
-
-					if (use_tri1a2)
-					{
-						index[count++] = pt1;
-						index[count++] = pt2;
-						index[count++] = pt1 + 1;
-						pt1++;
-					}
-					else
-					{
-						index[count++] = pt1;
-						index[count++] = pt2;
-						index[count++] = pt2 - 1;
-						pt2--;
-					}
-				}
-			}
-		}
-		else
-		{
-			// Open solid
-
-			for (t = 0; t < size_t - 1; t++)
-			{
-				// Outer face + 1 cut face
-				for (s = 0; s < size_s - 1; s++)
-				{
-					i  = s + t*size_s;
-
-					index[count++]  = i;				// x,y
-					index[count++]  = i + 1;			// x+1,y
-					index[count++]  = i + size_s;		// x,y+1
-
-					index[count++]  = i + size_s;		// x,y+1
-					index[count++]  = i + 1;			// x+1,y
-					index[count++]  = i + size_s + 1;	// x+1,y+1
-				}
-
-				// The other cut face
-				index[count++] = (size_s - 1) + (t*size_s);		// x,y
-				index[count++] = 0 + t*size_s;					// x+1,y
-				index[count++] = (size_s - 1) + (t+1)*size_s;	// x,y+1
-
-				index[count++] = (size_s - 1) + (t+1)*size_s;	// x,y+1
-				index[count++] = 0 + (t*size_s);				// x+1,y
-				index[count++] = 0 + (t+1)*size_s;				// x+1,y+1
-			}
-
-			// Do the top and bottom caps, if necessary
-			if (path_open)
-			{
-				for (s = 0; s < size_s - 2; s++)
-				{
-					index[count++] = s+1;
-					index[count++] = s;
-					index[count++] = size_s - 1;
-				}
-
-				// We've got a top cap
-				S32 offset = (size_t - 1)*size_s;
-				for (s = 0; s < size_s - 2; s++)
-				{
-					// Inverted ordering from bottom cap.
-					index[count++] = offset + size_s - 1;
-					index[count++] = offset + s;
-					index[count++] = offset + s + 1;
-				}
-			}
-		}
-	}
-	else if (hollow)
-	{
-		// Closed hollow
-		// Outer face
-		
-		for (t = 0; t < size_t - 1; t++)
-		{
-			for (s = 0; s < size_s_out - 1; s++)
-			{
-				i  = s + t*size_s;
-
-				index[count++]  = i;				// x,y
-				index[count++]  = i + 1;			// x+1,y
-				index[count++]  = i + size_s;		// x,y+1
-
-				index[count++]  = i + size_s;		// x,y+1
-				index[count++]  = i + 1;			// x+1,y
-				index[count++]  = i + 1 + size_s;	// x+1,y+1
-			}
-		}
-
-		// Inner face
-		// Invert facing from outer face
-		for (t = 0; t < size_t - 1; t++)
-		{
-			for (s = size_s_out; s < size_s - 1; s++)
-			{
-				i  = s + t*size_s;
-
-				index[count++]  = i;				// x,y
-				index[count++]  = i + 1;			// x+1,y
-				index[count++]  = i + size_s;		// x,y+1
-
-				index[count++]  = i + size_s;		// x,y+1
-				index[count++]  = i + 1;			// x+1,y
-				index[count++]  = i + 1 + size_s;	// x+1,y+1
-			}
-		}
-
-		// Do the top and bottom caps, if necessary
-		if (path_open)
-		{
-			// Top cap
-			S32 pt1 = 0;
-			S32 pt2 = size_s-1;
-			S32 i   = (size_t - 1)*size_s;
-
-			while (pt2 - pt1 > 1)
-			{
-				// Use the profile points instead of the mesh, since you want
-				// the un-transformed profile distances.
-				LLVector3 p1 = getProfile().mProfile[pt1];
-				LLVector3 p2 = getProfile().mProfile[pt2];
-				LLVector3 pa = getProfile().mProfile[pt1+1];
-				LLVector3 pb = getProfile().mProfile[pt2-1];
-
-				p1.mV[VZ] = 0.f;
-				p2.mV[VZ] = 0.f;
-				pa.mV[VZ] = 0.f;
-				pb.mV[VZ] = 0.f;
-
-				// Use area of triangle to determine backfacing
-				F32 area_1a2, area_1ba, area_21b, area_2ab;
-				area_1a2 =  (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) +
-							(pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) +
-							(p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]);
-
-				area_1ba =  (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) +
-							(pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) +
-							(pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]);
-
-				area_21b =  (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) +
-							(p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) +
-							(pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]);
-
-				area_2ab =  (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) +
-							(pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) +
-							(pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]);
-
-				BOOL use_tri1a2 = TRUE;
-				BOOL tri_1a2 = TRUE;
-				BOOL tri_21b = TRUE;
-
-				if (area_1a2 < 0)
-				{
-					tri_1a2 = FALSE;
-				}
-				if (area_2ab < 0)
-				{
-					// Can't use, because it contains point b
-					tri_1a2 = FALSE;
-				}
-				if (area_21b < 0)
-				{
-					tri_21b = FALSE;
-				}
-				if (area_1ba < 0)
-				{
-					// Can't use, because it contains point b
-					tri_21b = FALSE;
-				}
-
-				if (!tri_1a2)
-				{
-					use_tri1a2 = FALSE;
-				}
-				else if (!tri_21b)
-				{
-					use_tri1a2 = TRUE;
-				}
-				else
-				{
-					LLVector3 d1 = p1 - pa;
-					LLVector3 d2 = p2 - pb;
-
-					if (d1.magVecSquared() < d2.magVecSquared())
-					{
-						use_tri1a2 = TRUE;
-					}
-					else
-					{
-						use_tri1a2 = FALSE;
-					}
-				}
-
-				if (use_tri1a2)
-				{
-					index[count++] = pt1 + i;
-					index[count++] = pt1 + 1 + i;
-					index[count++] = pt2 + i;
-					pt1++;
-				}
-				else
-				{
-					index[count++] = pt1 + i;
-					index[count++] = pt2 - 1 + i;
-					index[count++] = pt2 + i;
-					pt2--;
-				}
-			}
-
-			// Bottom cap
-			pt1          = 0;
-			pt2          = size_s-1;
-			while (pt2 - pt1 > 1)
-			{
-				// Use the profile points instead of the mesh, since you want
-				// the un-transformed profile distances.
-				LLVector3 p1 = getProfile().mProfile[pt1];
-				LLVector3 p2 = getProfile().mProfile[pt2];
-				LLVector3 pa = getProfile().mProfile[pt1+1];
-				LLVector3 pb = getProfile().mProfile[pt2-1];
-
-				p1.mV[VZ] = 0.f;
-				p2.mV[VZ] = 0.f;
-				pa.mV[VZ] = 0.f;
-				pb.mV[VZ] = 0.f;
-
-				// Use area of triangle to determine backfacing
-				F32 area_1a2, area_1ba, area_21b, area_2ab;
-				area_1a2 =  (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) +
-							(pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) +
-							(p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]);
-
-				area_1ba =  (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) +
-							(pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) +
-							(pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]);
-
-				area_21b =  (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) +
-							(p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) +
-							(pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]);
-
-				area_2ab =  (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) +
-							(pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) +
-							(pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]);
-
-				BOOL use_tri1a2 = TRUE;
-				BOOL tri_1a2 = TRUE;
-				BOOL tri_21b = TRUE;
-
-				if (area_1a2 < 0)
-				{
-					tri_1a2 = FALSE;
-				}
-				if (area_2ab < 0)
-				{
-					// Can't use, because it contains point b
-					tri_1a2 = FALSE;
-				}
-				if (area_21b < 0)
-				{
-					tri_21b = FALSE;
-				}
-				if (area_1ba < 0)
-				{
-					// Can't use, because it contains point b
-					tri_21b = FALSE;
-				}
-
-				if (!tri_1a2)
-				{
-					use_tri1a2 = FALSE;
-				}
-				else if (!tri_21b)
-				{
-					use_tri1a2 = TRUE;
-				}
-				else
-				{
-					LLVector3 d1 = p1 - pa;
-					LLVector3 d2 = p2 - pb;
-
-					if (d1.magVecSquared() < d2.magVecSquared())
-					{
-						use_tri1a2 = TRUE;
-					}
-					else
-					{
-						use_tri1a2 = FALSE;
-					}
-				}
-
-				if (use_tri1a2)
-				{
-					index[count++] = pt1;
-					index[count++] = pt2;
-					index[count++] = pt1 + 1;
-					pt1++;
-				}
-				else
-				{
-					index[count++] = pt1;
-					index[count++] = pt2;
-					index[count++] = pt2 - 1;
-					pt2--;
-				}
-			}
-		}		
-	}
-	else
-	{
-		// Closed solid.  Easy case.
-		for (t = 0; t < size_t - 1; t++)
-		{
-			for (s = 0; s < size_s - 1; s++)
-			{
-				// Should wrap properly, but for now...
-				i  = s + t*size_s;
-
-				index[count++]  = i;				// x,y
-				index[count++]  = i + 1;			// x+1,y
-				index[count++]  = i + size_s;		// x,y+1
-
-				index[count++]  = i + size_s;		// x,y+1
-				index[count++]  = i + 1;			// x+1,y
-				index[count++]  = i + size_s + 1;	// x+1,y+1
-			}
-		}
-
-		// Do the top and bottom caps, if necessary
-		if (path_open)
-		{
-			// bottom cap
-			for (s = 1; s < size_s - 2; s++)
-			{
-				index[count++] = s+1;
-				index[count++] = s;
-				index[count++] = 0;
-			}
-
-			// top cap
-			S32 offset = (size_t - 1)*size_s;
-			for (s = 1; s < size_s - 2; s++)
-			{
-				// Inverted ordering from bottom cap.
-				index[count++] = offset;
-				index[count++] = offset + s;
-				index[count++] = offset + s + 1;
-			}
-		}
-	}
-
-#ifdef LL_DEBUG
-	// assert that we computed the correct number of indices
-	if (count != expected_num_triangle_indices )
-	{
-		llerrs << "bad index count prediciton:"
-			<< "  expected=" << expected_num_triangle_indices 
-			<< " actual=" << count << llendl;
-	}
-#endif
-
-#if 0
-	// verify that each index does not point beyond the size of the mesh
-	S32 num_vertices = mMesh.size();
-	for (i = 0; i < count; i+=3)
-	{
-		llinfos << index[i] << ":" << index[i+1] << ":" << index[i+2] << llendl;
-		llassert(index[i] < num_vertices);
-		llassert(index[i+1] < num_vertices);
-		llassert(index[i+2] < num_vertices);
-	}
-#endif
-
-	num_indices = count;
-	return index;
-}
-
-void LLVolume::getLoDTriangleCounts(const LLVolumeParams& params, S32* counts)
-{ //attempt to approximate the number of triangles that will result from generating a volume LoD set for the 
-	//supplied LLVolumeParams -- inaccurate, but a close enough approximation for determining streaming cost
-	F32 detail[] = {1.f, 1.5f, 2.5f, 4.f};	
-	for (S32 i = 0; i < 4; i++)
-	{
-		S32 count = 0;
-		S32 path_points = LLPath::getNumPoints(params.getPathParams(), detail[i]);
-		S32 profile_points = LLProfile::getNumPoints(params.getProfileParams(), false, detail[i]);
-
-		count = (profile_points-1)*2*(path_points-1);
-		count += profile_points*2;
-
-		counts[i] = count;
-	}
-}
-
-S32 LLVolume::getNumTriangleIndices() const
-{
-	BOOL profile_open = getProfile().isOpen();
-	BOOL hollow = (mParams.getProfileParams().getHollow() > 0);
-	BOOL path_open = getPath().isOpen();
-
-	S32 size_s, size_s_out, size_t;
-	size_s = getProfile().getTotal();
-	size_s_out = getProfile().getTotalOut();
-	size_t = getPath().mPath.size();
-
-	S32 count = 0;
-	if (profile_open)		/* Flawfinder: ignore */
-	{
-		if (hollow)
-		{
-			// Open hollow -- much like the closed solid, except we 
-			// we need to stitch up the gap between s=0 and s=size_s-1
-			count = (size_t - 1) * (((size_s -1) * 6) + 6);
-		}
-		else
-		{
-			count = (size_t - 1) * (((size_s -1) * 6) + 6); 
-		}
-	}
-	else if (hollow)
-	{
-		// Closed hollow
-		// Outer face
-		count = (size_t - 1) * (size_s_out - 1) * 6;
-
-		// Inner face
-		count += (size_t - 1) * ((size_s - 1) - size_s_out) * 6;
-	}
-	else
-	{
-		// Closed solid.  Easy case.
-		count = (size_t - 1) * (size_s - 1) * 6;
-	}
-
-	if (path_open)
-	{
-		S32 cap_triangle_count = size_s - 3;
-		if ( profile_open
-			|| hollow )
-		{
-			cap_triangle_count = size_s - 2;
-		}
-		if ( cap_triangle_count > 0 )
-		{
-			// top and bottom caps
-			count += cap_triangle_count * 2 * 3;
-		}
-	}
-	return count;
-}
-
-
-S32 LLVolume::getNumTriangles(S32* vcount) const
-{
-	U32 triangle_count = 0;
-	U32 vertex_count = 0;
-
-	for (S32 i = 0; i < getNumVolumeFaces(); ++i)
-	{
-		const LLVolumeFace& face = getVolumeFace(i);
-		triangle_count += face.mNumIndices/3;
-
-		vertex_count += face.mNumVertices;
-	}
-
-
-	if (vcount)
-	{
-		*vcount = vertex_count;
-	}
-	
-	return triangle_count;
-}
-
-
-//-----------------------------------------------------------------------------
-// generateSilhouetteVertices()
-//-----------------------------------------------------------------------------
-void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices,
-										  std::vector<LLVector3> &normals,
-										  const LLVector3& obj_cam_vec_in,
-										  const LLMatrix4& mat_in,
-										  const LLMatrix3& norm_mat_in,
-										  S32 face_mask)
-{
-	LLMatrix4a mat;
-	mat.loadu(mat_in);
-
-	LLMatrix4a norm_mat;
-	norm_mat.loadu(norm_mat_in);
-		
-	LLVector4a obj_cam_vec;
-	obj_cam_vec.load3(obj_cam_vec_in.mV);
-
-	vertices.clear();
-	normals.clear();
-
-	if ((mParams.getSculptType() & LL_SCULPT_TYPE_MASK) == LL_SCULPT_TYPE_MESH)
-	{
-		return;
-	}
-	
-	S32 cur_index = 0;
-	//for each face
-	for (face_list_t::iterator iter = mVolumeFaces.begin();
-		 iter != mVolumeFaces.end(); ++iter)
-	{
-		LLVolumeFace& face = *iter;
-	
-		if (!(face_mask & (0x1 << cur_index++)) ||
-		     face.mNumIndices == 0 || face.mEdge.empty())
-		{
-			continue;
-		}
-
-		if (face.mTypeMask & (LLVolumeFace::CAP_MASK)) {
-	
-		}
-		else {
-
-			//==============================================
-			//DEBUG draw edge map instead of silhouette edge
-			//==============================================
-
-#if DEBUG_SILHOUETTE_EDGE_MAP
-
-			//for each triangle
-			U32 count = face.mNumIndices;
-			for (U32 j = 0; j < count/3; j++) {
-				//get vertices
-				S32 v1 = face.mIndices[j*3+0];
-				S32 v2 = face.mIndices[j*3+1];
-				S32 v3 = face.mIndices[j*3+2];
-
-				//get current face center
-				LLVector3 cCenter = (face.mVertices[v1].getPosition() + 
-									face.mVertices[v2].getPosition() + 
-									face.mVertices[v3].getPosition()) / 3.0f;
-
-				//for each edge
-				for (S32 k = 0; k < 3; k++) {
-                    S32 nIndex = face.mEdge[j*3+k];
-					if (nIndex <= -1) {
-						continue;
-					}
-
-					if (nIndex >= (S32) count/3) {
-						continue;
-					}
-					//get neighbor vertices
-					v1 = face.mIndices[nIndex*3+0];
-					v2 = face.mIndices[nIndex*3+1];
-					v3 = face.mIndices[nIndex*3+2];
+					//get neighbor vertices
+					v1 = face.mIndices[nIndex*3+0];
+					v2 = face.mIndices[nIndex*3+1];
+					v3 = face.mIndices[nIndex*3+2];
 
 					//get neighbor face center
 					LLVector3 nCenter = (face.mVertices[v1].getPosition() + 
@@ -5243,8 +4598,6 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src)
 
 	freeData();
 	
-	LLVector4a::memcpyNonAliased16((F32*) mExtents, (F32*) src.mExtents, 3*sizeof(LLVector4a));
-
 	resizeVertices(src.mNumVertices);
 	resizeIndices(src.mNumIndices);
 
@@ -5307,7 +4660,7 @@ LLVolumeFace::~LLVolumeFace()
 
 void LLVolumeFace::freeData()
 {
-	ll_aligned_free_16(mPositions);
+	ll_aligned_free(mPositions);
 	mPositions = NULL;
 
 	//normals and texture coordinates are part of the same buffer as mPositions, do not free them separately
@@ -5331,52 +4684,23 @@ BOOL LLVolumeFace::create(LLVolume* volume, BOOL partial_build)
 	delete mOctree;
 	mOctree = NULL;
 
+	LL_CHECK_MEMORY
 	BOOL ret = FALSE ;
 	if (mTypeMask & CAP_MASK)
 	{
 		ret = createCap(volume, partial_build);
+		LL_CHECK_MEMORY
 	}
 	else if ((mTypeMask & END_MASK) || (mTypeMask & SIDE_MASK))
 	{
 		ret = createSide(volume, partial_build);
+		LL_CHECK_MEMORY
 	}
 	else
 	{
 		llerrs << "Unknown/uninitialized face type!" << llendl;
 	}
 
-	//update the range of the texture coordinates
-	if(ret)
-	{
-		mTexCoordExtents[0].setVec(1.f, 1.f) ;
-		mTexCoordExtents[1].setVec(0.f, 0.f) ;
-
-		for(U32 i = 0 ; i < mNumVertices ; i++)
-		{
-			if(mTexCoordExtents[0].mV[0] > mTexCoords[i].mV[0])
-			{
-				mTexCoordExtents[0].mV[0] = mTexCoords[i].mV[0] ;
-			}
-			if(mTexCoordExtents[1].mV[0] < mTexCoords[i].mV[0])
-			{
-				mTexCoordExtents[1].mV[0] = mTexCoords[i].mV[0] ;
-			}
-
-			if(mTexCoordExtents[0].mV[1] > mTexCoords[i].mV[1])
-			{
-				mTexCoordExtents[0].mV[1] = mTexCoords[i].mV[1] ;
-			}
-			if(mTexCoordExtents[1].mV[1] < mTexCoords[i].mV[1])
-			{
-				mTexCoordExtents[1].mV[1] = mTexCoords[i].mV[1] ;
-			}			
-		}
-		mTexCoordExtents[0].mV[0] = llmax(0.f, mTexCoordExtents[0].mV[0]) ;
-		mTexCoordExtents[0].mV[1] = llmax(0.f, mTexCoordExtents[0].mV[1]) ;
-		mTexCoordExtents[1].mV[0] = llmin(1.f, mTexCoordExtents[1].mV[0]) ;
-		mTexCoordExtents[1].mV[1] = llmin(1.f, mTexCoordExtents[1].mV[1]) ;
-	}
-
 	return ret ;
 }
 
@@ -6068,8 +5392,10 @@ void	LerpPlanarVertex(LLVolumeFace::VertexData& v0,
 
 BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build)
 {
-	const std::vector<LLVolume::Point>& mesh = volume->getMesh();
-	const std::vector<LLVector3>& profile = volume->getProfile().mProfile;
+	LL_CHECK_MEMORY		
+
+	const LLAlignedArray<LLVector4a,64>& mesh = volume->getMesh();
+	const LLAlignedArray<LLVector4a,64>& profile = volume->getProfile().mProfile;
 	S32 max_s = volume->getProfile().getTotal();
 	S32 max_t = volume->getPath().mPath.size();
 
@@ -6099,9 +5425,9 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build)
 		VertexData baseVert;
 		for(S32 t = 0; t < 4; t++)
 		{
-			corners[t].getPosition().load3( mesh[offset + (grid_size*t)].mPos.mV);
-			corners[t].mTexCoord.mV[0] = profile[grid_size*t].mV[0]+0.5f;
-			corners[t].mTexCoord.mV[1] = 0.5f - profile[grid_size*t].mV[1];
+			corners[t].getPosition().load4a(mesh[offset + (grid_size*t)].getF32ptr());
+			corners[t].mTexCoord.mV[0] = profile[grid_size*t][0]+0.5f;
+			corners[t].mTexCoord.mV[1] = 0.5f - profile[grid_size*t][1];
 		}
 
 		{
@@ -6182,6 +5508,9 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build)
 		mCenter->mul(0.5f); 
 	}
 
+	llassert(less_than_max_mag(mExtents[0]));
+	llassert(less_than_max_mag(mExtents[1]));
+
 	if (!partial_build)
 	{
 		resizeIndices(grid_size*grid_size*6);
@@ -6212,6 +5541,7 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build)
 		}
 	}
 		
+	LL_CHECK_MEMORY
 	return TRUE;
 }
 
@@ -6230,8 +5560,8 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)
 
 	S32 num_vertices = 0, num_indices = 0;
 
-	const std::vector<LLVolume::Point>& mesh = volume->getMesh();
-	const std::vector<LLVector3>& profile = volume->getProfile().mProfile;
+	const LLAlignedArray<LLVector4a,64>& mesh = volume->getMesh();
+	const LLAlignedArray<LLVector4a,64>& profile = volume->getProfile().mProfile;
 
 	// All types of caps have the same number of vertices and indices
 	num_vertices = profile.size();
@@ -6251,13 +5581,14 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)
 	{
 		resizeVertices(num_vertices);
 		allocateBinormals(num_vertices);
-
 		if (!partial_build)
 		{
 			resizeIndices(num_indices);
 		}
 	}
 
+	LL_CHECK_MEMORY;
+
 	S32 max_s = volume->getProfile().getTotal();
 	S32 max_t = volume->getPath().mPath.size();
 
@@ -6288,35 +5619,68 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)
 	LLVector4a* binorm = (LLVector4a*) mBinormals;
 
 	// Copy the vertices into the array
-	for (S32 i = 0; i < num_vertices; i++)
+
+	const LLVector4a* src = mesh.mArray+offset;
+	const LLVector4a* end = src+num_vertices;
+	
+	min = *src;
+	max = min;
+	
+	
+	const LLVector4a* p = profile.mArray;
+
+	if (mTypeMask & TOP_MASK)
 	{
-		if (mTypeMask & TOP_MASK)
-		{
-			tc[i].mV[0] = profile[i].mV[0]+0.5f;
-			tc[i].mV[1] = profile[i].mV[1]+0.5f;
-		}
-		else
+		min_uv.set((*p)[0]+0.5f,
+					(*p)[1]+0.5f);
+
+		max_uv = min_uv;
+
+		while(src < end)
 		{
-			// Mirror for underside.
-			tc[i].mV[0] = profile[i].mV[0]+0.5f;
-			tc[i].mV[1] = 0.5f - profile[i].mV[1];
-		}
+			tc->mV[0] = (*p)[0]+0.5f;
+			tc->mV[1] = (*p)[1]+0.5f;
 
-		pos[i].load3(mesh[i + offset].mPos.mV);
+			llassert(less_than_max_mag(*src));
+			update_min_max(min,max,*src);
+			update_min_max(min_uv, max_uv, *tc);
 		
-		if (i == 0)
-		{
-			max = pos[i];
-			min = max;
-			min_uv = max_uv = tc[i];
+			*pos = *src;
+		
+			++p;
+			++tc;
+			++src;
+			++pos;
 		}
-		else
+	}
+	else
+	{
+
+		min_uv.set((*p)[0]+0.5f,
+				   0.5f - (*p)[1]);
+		max_uv = min_uv;
+
+		while(src < end)
 		{
-			update_min_max(min,max,pos[i]);
-			update_min_max(min_uv, max_uv, tc[i]);
+			// Mirror for underside.
+			tc->mV[0] = (*p)[0]+0.5f;
+			tc->mV[1] = 0.5f - (*p)[1];
+		
+			llassert(less_than_max_mag(*src));
+			update_min_max(min,max,*src);
+			update_min_max(min_uv, max_uv, *tc);
+		
+			*pos = *src;
+		
+			++p;
+			++tc;
+			++src;
+			++pos;
 		}
 	}
 
+	LL_CHECK_MEMORY
+
 	mCenter->setAdd(min, max);
 	mCenter->mul(0.5f); 
 
@@ -6353,15 +5717,25 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)
 	
 	if (!(mTypeMask & HOLLOW_MASK) && !(mTypeMask & OPEN_MASK))
 	{
-		pos[num_vertices] = *mCenter;
-		tc[num_vertices] = cuv;
+		*pos++ = *mCenter;
+		*tc++ = cuv;
 		num_vertices++;
 	}
-		
-	for (S32 i = 0; i < num_vertices; i++)
+	
+	LL_CHECK_MEMORY
+
+	F32* dst_binorm = (F32*) binorm;
+	F32* end_binorm = (F32*) (binorm+num_vertices);
+
+	F32* dst_norm = (F32*) norm;
+	
+	while (dst_binorm < end_binorm)
 	{
-		binorm[i].load4a(binormal.getF32ptr());
-		norm[i].load4a(normal.getF32ptr());
+		binormal.store4a(dst_binorm);
+		normal.store4a(dst_norm);
+
+		dst_binorm += 4;
+		dst_norm += 4;
 	}
 
 	if (partial_build)
@@ -6382,33 +5756,38 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)
 			{
 				// Use the profile points instead of the mesh, since you want
 				// the un-transformed profile distances.
-				LLVector3 p1 = profile[pt1];
-				LLVector3 p2 = profile[pt2];
-				LLVector3 pa = profile[pt1+1];
-				LLVector3 pb = profile[pt2-1];
+				const LLVector4a& p1 = profile[pt1];
+				const LLVector4a& p2 = profile[pt2];
+				const LLVector4a& pa = profile[pt1+1];
+				const LLVector4a& pb = profile[pt2-1];
+
+				const F32* p1V = p1.getF32ptr();
+				const F32* p2V = p2.getF32ptr();
+				const F32* paV = pa.getF32ptr();
+				const F32* pbV = pb.getF32ptr();
 
-				p1.mV[VZ] = 0.f;
-				p2.mV[VZ] = 0.f;
-				pa.mV[VZ] = 0.f;
-				pb.mV[VZ] = 0.f;
+				//p1.mV[VZ] = 0.f;
+				//p2.mV[VZ] = 0.f;
+				//pa.mV[VZ] = 0.f;
+				//pb.mV[VZ] = 0.f;
 
 				// Use area of triangle to determine backfacing
 				F32 area_1a2, area_1ba, area_21b, area_2ab;
-				area_1a2 =  (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) +
-							(pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) +
-							(p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]);
+				area_1a2 =  (p1V[0]*paV[1] - paV[0]*p1V[1]) +
+							(paV[0]*p2V[1] - p2V[0]*paV[1]) +
+							(p2V[0]*p1V[1] - p1V[0]*p2V[1]);
 
-				area_1ba =  (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) +
-							(pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) +
-							(pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]);
+				area_1ba =  (p1V[0]*pbV[1] - pbV[0]*p1V[1]) +
+							(pbV[0]*paV[1] - paV[0]*pbV[1]) +
+							(paV[0]*p1V[1] - p1V[0]*paV[1]);
 
-				area_21b =  (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) +
-							(p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) +
-							(pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]);
+				area_21b =  (p2V[0]*p1V[1] - p1V[0]*p2V[1]) +
+							(p1V[0]*pbV[1] - pbV[0]*p1V[1]) +
+							(pbV[0]*p2V[1] - p2V[0]*pbV[1]);
 
-				area_2ab =  (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) +
-							(pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) +
-							(pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]);
+				area_2ab =  (p2V[0]*paV[1] - paV[0]*p2V[1]) +
+							(paV[0]*pbV[1] - pbV[0]*paV[1]) +
+							(pbV[0]*p2V[1] - p2V[0]*pbV[1]);
 
 				BOOL use_tri1a2 = TRUE;
 				BOOL tri_1a2 = TRUE;
@@ -6443,10 +5822,13 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)
 				}
 				else
 				{
-					LLVector3 d1 = p1 - pa;
-					LLVector3 d2 = p2 - pb;
+					LLVector4a d1;
+					d1.setSub(p1, pa);
+					
+					LLVector4a d2; 
+					d2.setSub(p2, pb);
 
-					if (d1.magVecSquared() < d2.magVecSquared())
+					if (d1.dot3(d1) < d2.dot3(d2))
 					{
 						use_tri1a2 = TRUE;
 					}
@@ -6485,33 +5867,33 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)
 			{
 				// Use the profile points instead of the mesh, since you want
 				// the un-transformed profile distances.
-				LLVector3 p1 = profile[pt1];
-				LLVector3 p2 = profile[pt2];
-				LLVector3 pa = profile[pt1+1];
-				LLVector3 pb = profile[pt2-1];
-
-				p1.mV[VZ] = 0.f;
-				p2.mV[VZ] = 0.f;
-				pa.mV[VZ] = 0.f;
-				pb.mV[VZ] = 0.f;
-
+				const LLVector4a& p1 = profile[pt1];
+				const LLVector4a& p2 = profile[pt2];
+				const LLVector4a& pa = profile[pt1+1];
+				const LLVector4a& pb = profile[pt2-1];
+
+				const F32* p1V = p1.getF32ptr();
+				const F32* p2V = p2.getF32ptr();
+				const F32* paV = pa.getF32ptr();
+				const F32* pbV = pb.getF32ptr();
+				
 				// Use area of triangle to determine backfacing
 				F32 area_1a2, area_1ba, area_21b, area_2ab;
-				area_1a2 =  (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) +
-							(pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) +
-							(p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]);
+				area_1a2 =  (p1V[0]*paV[1] - paV[0]*p1V[1]) +
+							(paV[0]*p2V[1] - p2V[0]*paV[1]) +
+							(p2V[0]*p1V[1] - p1V[0]*p2V[1]);
 
-				area_1ba =  (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) +
-							(pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) +
-							(pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]);
+				area_1ba =  (p1V[0]*pbV[1] - pbV[0]*p1V[1]) +
+							(pbV[0]*paV[1] - paV[0]*pbV[1]) +
+							(paV[0]*p1V[1] - p1V[0]*paV[1]);
 
-				area_21b =  (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) +
-							(p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) +
-							(pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]);
+				area_21b =  (p2V[0]*p1V[1] - p1V[0]*p2V[1]) +
+							(p1V[0]*pbV[1] - pbV[0]*p1V[1]) +
+							(pbV[0]*p2V[1] - p2V[0]*pbV[1]);
 
-				area_2ab =  (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) +
-							(pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) +
-							(pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]);
+				area_2ab =  (p2V[0]*paV[1] - paV[0]*p2V[1]) +
+							(paV[0]*pbV[1] - pbV[0]*paV[1]) +
+							(pbV[0]*p2V[1] - p2V[0]*pbV[1]);
 
 				BOOL use_tri1a2 = TRUE;
 				BOOL tri_1a2 = TRUE;
@@ -6546,10 +5928,12 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)
 				}
 				else
 				{
-					LLVector3 d1 = p1 - pa;
-					LLVector3 d2 = p2 - pb;
+					LLVector4a d1;
+					d1.setSub(p1,pa);
+					LLVector4a d2;
+					d2.setSub(p2,pb);
 
-					if (d1.magVecSquared() < d2.magVecSquared())
+					if (d1.dot3(d1) < d2.dot3(d2))
 					{
 						use_tri1a2 = TRUE;
 					}
@@ -6598,6 +5982,8 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)
 
 
 	}
+
+	LL_CHECK_MEMORY
 		
 	return TRUE;
 }
@@ -6900,6 +6286,7 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat
 
 BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
 {
+	LL_CHECK_MEMORY
 	BOOL flat = mTypeMask & FLAT_MASK;
 
 	U8 sculpt_type = volume->getParams().getSculptType();
@@ -6910,9 +6297,9 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
 	
 	S32 num_vertices, num_indices;
 
-	const std::vector<LLVolume::Point>& mesh = volume->getMesh();
-	const std::vector<LLVector3>& profile = volume->getProfile().mProfile;
-	const std::vector<LLPath::PathPt>& path_data = volume->getPath().mPath;
+	const LLAlignedArray<LLVector4a,64>& mesh = volume->getMesh();
+	const LLAlignedArray<LLVector4a,64>& profile = volume->getProfile().mProfile;
+	const LLAlignedArray<LLPath::PathPt,64>& path_data = volume->getPath().mPath;
 
 	S32 max_s = volume->getProfile().getTotal();
 
@@ -6933,10 +6320,11 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
 		}
 	}
 
+	LL_CHECK_MEMORY
+
 	LLVector4a* pos = (LLVector4a*) mPositions;
-	LLVector4a* norm = (LLVector4a*) mNormals;
 	LLVector2* tc = (LLVector2*) mTexCoords;
-	F32 begin_stex = floorf(profile[mBeginS].mV[2]);
+	F32 begin_stex = floorf(profile[mBeginS][2]);
 	S32 num_s = ((mTypeMask & INNER_MASK) && (mTypeMask & FLAT_MASK) && mNumS > 2) ? mNumS/2 : mNumS;
 
 	S32 cur_vertex = 0;
@@ -6965,11 +6353,11 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
 				// Get s value for tex-coord.
 				if (!flat)
 				{
-					ss = profile[mBeginS + s].mV[2];
+					ss = profile[mBeginS + s][2];
 				}
 				else
 				{
-					ss = profile[mBeginS + s].mV[2] - begin_stex;
+					ss = profile[mBeginS + s][2] - begin_stex;
 				}
 			}
 
@@ -6989,19 +6377,17 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
 				i = mBeginS + s + max_s*t;
 			}
 
-			pos[cur_vertex].load3(mesh[i].mPos.mV);
-			tc[cur_vertex] = LLVector2(ss,tt);
+			llassert(less_than_max_mag(mesh[i]));
+			mesh[i].store4a((F32*)(pos+cur_vertex));
+			tc[cur_vertex].set(ss,tt);
 		
-			norm[cur_vertex].clear();
 			cur_vertex++;
 
 			if (test && s > 0)
 			{
-				pos[cur_vertex].load3(mesh[i].mPos.mV);
-				tc[cur_vertex] = LLVector2(ss,tt);
-			
-				norm[cur_vertex].clear();
-				
+				llassert(less_than_max_mag(mesh[i]));
+				mesh[i].store4a((F32*)(pos+cur_vertex));
+				tc[cur_vertex].set(ss,tt);
 				cur_vertex++;
 			}
 		}
@@ -7018,28 +6404,66 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
 			}
 
 			i = mBeginS + s + max_s*t;
-			ss = profile[mBeginS + s].mV[2] - begin_stex;
-			pos[cur_vertex].load3(mesh[i].mPos.mV);
-			tc[cur_vertex] = LLVector2(ss,tt);
-			norm[cur_vertex].clear(); 
-			
+			ss = profile[mBeginS + s][2] - begin_stex;
+
+			llassert(less_than_max_mag(mesh[i]));
+			mesh[i].store4a((F32*)(pos+cur_vertex));
+			tc[cur_vertex].set(ss,tt);
+						
 			cur_vertex++;
 		}
 	}
 	
+	LL_CHECK_MEMORY
 
-	//get bounding box for this side
-	LLVector4a& face_min = mExtents[0];
-	LLVector4a& face_max = mExtents[1];
+	
 	mCenter->clear();
 
-	face_min = face_max = pos[0];
+	LLVector4a* cur_pos = pos;
+	LLVector4a* end_pos = pos + mNumVertices;
+
+	//get bounding box for this side
+	LLVector4a face_min;
+	LLVector4a face_max;
+	
+	face_min = face_max = *cur_pos++;
+		
+	while (cur_pos < end_pos)
+	{
+		update_min_max(face_min, face_max, *cur_pos++);
+	}
+
+	mExtents[0] = face_min;
+	mExtents[1] = face_max;
+
+	U32 tc_count = mNumVertices;
+	if (tc_count%2 == 1)
+	{ //odd number of texture coordinates, duplicate last entry to padded end of array
+		tc_count++;
+		mTexCoords[mNumVertices] = mTexCoords[mNumVertices-1];
+	}
+
+	LLVector4a* cur_tc = (LLVector4a*) mTexCoords;
+	LLVector4a* end_tc = (LLVector4a*) (mTexCoords+tc_count);
+
+	LLVector4a tc_min; 
+	LLVector4a tc_max; 
+
+	tc_min = tc_max = *cur_tc++;
 
-	for (U32 i = 1; i < mNumVertices; ++i)
+	while (cur_tc < end_tc)
 	{
-		update_min_max(face_min, face_max, pos[i]);
+		update_min_max(tc_min, tc_max, *cur_tc++);
 	}
 
+	F32* minp = tc_min.getF32ptr();
+	F32* maxp = tc_max.getF32ptr();
+
+	mTexCoordExtents[0].mV[0] = llmin(minp[0], minp[2]);
+	mTexCoordExtents[0].mV[1] = llmin(minp[1], minp[3]);
+	mTexCoordExtents[1].mV[0] = llmax(maxp[0], maxp[2]);
+	mTexCoordExtents[1].mV[1] = llmax(maxp[1], maxp[3]);
+
 	mCenter->setAdd(face_min, face_max);
 	mCenter->mul(0.5f);
 
@@ -7104,33 +6528,94 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
 		}
 	}
 
+	LL_CHECK_MEMORY
+
 	//clear normals
-	for (U32 i = 0; i < mNumVertices; i++)
+	F32* dst = (F32*) mNormals;
+	F32* end = (F32*) (mNormals+mNumVertices);
+	LLVector4a zero = LLVector4a::getZero();
+
+	while (dst < end)
 	{
-		mNormals[i].clear();
+		zero.store4a(dst);
+		dst += 4;
 	}
 
+	LL_CHECK_MEMORY
+
 	//generate normals 
 	U32 count = mNumIndices/3;
 
-	for (U32 i = 0; i < count; i++) //for each triangle
+	LLVector4a* norm = mNormals;
+
+	static LLAlignedArray<LLVector4a, 64> triangle_normals;
+	triangle_normals.resize(count);
+	LLVector4a* output = triangle_normals.mArray;
+	LLVector4a* end_output = output+count;
+
+	U16* idx = mIndices;
+
+	while (output < end_output)
 	{
-		const U16* idx = &(mIndices[i*3]);
-		
-		LLVector4a& v0 = *(pos+idx[0]);
-		LLVector4a& v1 = *(pos+idx[1]);
-		LLVector4a& v2 = *(pos+idx[2]);
-		
-		LLVector4a& n0 = *(norm+idx[0]);
-		LLVector4a& n1 = *(norm+idx[1]);
-		LLVector4a& n2 = *(norm+idx[2]);
+		LLVector4a b,v1,v2;
+		b.load4a((F32*) (pos+idx[0]));
+		v1.load4a((F32*) (pos+idx[1]));
+		v2.load4a((F32*) (pos+idx[2]));
 		
 		//calculate triangle normal
-		LLVector4a a, b, c;
+		LLVector4a a;
 		
-		a.setSub(v0, v1);
-		b.setSub(v0, v2);
-		c.setCross3(a,b);
+		a.setSub(b, v1);
+		b.sub(v2);
+
+
+		LLQuad& vector1 = *((LLQuad*) &v1);
+		LLQuad& vector2 = *((LLQuad*) &v2);
+
+		LLQuad& amQ = *((LLQuad*) &a);
+		LLQuad& bmQ = *((LLQuad*) &b);
+
+		//v1.setCross3(t,v0);
+		//setCross3(const LLVector4a& a, const LLVector4a& b)
+		// Vectors are stored in memory in w, z, y, x order from high to low
+		// Set vector1 = { a[W], a[X], a[Z], a[Y] }
+		vector1 = _mm_shuffle_ps( amQ, amQ, _MM_SHUFFLE( 3, 0, 2, 1 ));
+		// Set vector2 = { b[W], b[Y], b[X], b[Z] }
+		vector2 = _mm_shuffle_ps( bmQ, bmQ, _MM_SHUFFLE( 3, 1, 0, 2 ));
+		// mQ = { a[W]*b[W], a[X]*b[Y], a[Z]*b[X], a[Y]*b[Z] }
+		vector2 = _mm_mul_ps( vector1, vector2 );
+		// vector3 = { a[W], a[Y], a[X], a[Z] }
+		amQ = _mm_shuffle_ps( amQ, amQ, _MM_SHUFFLE( 3, 1, 0, 2 ));
+		// vector4 = { b[W], b[X], b[Z], b[Y] }
+		bmQ = _mm_shuffle_ps( bmQ, bmQ, _MM_SHUFFLE( 3, 0, 2, 1 ));
+		// mQ = { 0, a[X]*b[Y] - a[Y]*b[X], a[Z]*b[X] - a[X]*b[Z], a[Y]*b[Z] - a[Z]*b[Y] }
+		vector1 = _mm_sub_ps( vector2, _mm_mul_ps( amQ, bmQ ));
+
+		v1.store4a((F32*) output);
+
+		output++;
+		idx += 3;
+	}
+
+	idx = mIndices;
+
+	LLVector4a* src = triangle_normals.mArray;
+	
+	for (U32 i = 0; i < count; i++) //for each triangle
+	{
+		LLVector4a c;
+		c.load4a((F32*) (src++));
+
+		LLVector4a* n0p = norm+idx[0];
+		LLVector4a* n1p = norm+idx[1];
+		LLVector4a* n2p = norm+idx[2];
+
+		idx += 3;
+
+		LLVector4a n0,n1,n2;
+		n0.load4a((F32*) n0p);
+		n1.load4a((F32*) n1p);
+		n2.load4a((F32*) n2p);
 
 		n0.add(c);
 		n1.add(c);
@@ -7143,8 +6628,14 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
 			case 1: n1.add(c); break;
 			case 2: n2.add(c); break;
 		};
+
+		n0.store4a((F32*) n0p);
+		n1.store4a((F32*) n1p);
+		n2.store4a((F32*) n2p);
 	}
 	
+	LL_CHECK_MEMORY
+
 	// adjust normals based on wrapping and stitching
 	
 	LLVector4a top;
@@ -7276,6 +6767,8 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
 
 	}
 
+	LL_CHECK_MEMORY
+
 	return TRUE;
 }
 
diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h
index 1d3b0fe52f..5e43af92ec 100644
--- a/indra/llmath/llvolume.h
+++ b/indra/llmath/llvolume.h
@@ -37,7 +37,6 @@ class LLPath;
 
 template <class T> class LLOctreeNode;
 
-class LLVector4a;
 class LLVolumeFace;
 class LLVolume;
 class LLVolumeTriangle;
@@ -50,6 +49,8 @@ class LLVolumeTriangle;
 #include "v3math.h"
 #include "v3dmath.h"
 #include "v4math.h"
+#include "llvector4a.h"
+#include "llmatrix4a.h"
 #include "llquaternion.h"
 #include "llstrider.h"
 #include "v4coloru.h"
@@ -194,6 +195,26 @@ const U8 LL_SCULPT_FLAG_MIRROR    = 128;
 
 const S32 LL_SCULPT_MESH_MAX_FACES = 8;
 
+template <class T, U32 alignment>
+class LLAlignedArray
+{
+public:
+	T* mArray;
+	U32 mElementCount;
+	U32 mCapacity;
+
+	LLAlignedArray();
+	~LLAlignedArray();
+
+	void push_back(const T& elem);
+	U32 size() const { return mElementCount; }
+	void resize(U32 size);
+	T* append(S32 N);
+	T& operator[](int idx);
+	const T& operator[](int idx) const;
+};
+
+
 class LLProfileParams
 {
 public:
@@ -708,16 +729,16 @@ public:
 		LLFaceID  mFaceID;
 	};
 	
-	std::vector<LLVector3> mProfile;	
-	std::vector<LLVector2> mNormals;
+	LLAlignedArray<LLVector4a, 64> mProfile;	
+	//LLAlignedArray<LLVector4a, 64> mNormals;
 	std::vector<Face>      mFaces;
-	std::vector<LLVector3> mEdgeNormals;
-	std::vector<LLVector3> mEdgeCenters;
+
+	//LLAlignedArray<LLVector4a, 64> mEdgeNormals;
+	//LLAlignedArray<LLVector4a, 64> mEdgeCenters;
 
 	friend std::ostream& operator<<(std::ostream &s, const LLProfile &profile);
 
 protected:
-	void genNormals(const LLProfileParams& params);
 	static S32 getNumNGonPoints(const LLProfileParams& params, S32 sides, F32 offset=0.0f, F32 bevel = 0.0f, F32 ang_scale = 1.f, S32 split = 0);
 	void genNGon(const LLProfileParams& params, S32 sides, F32 offset=0.0f, F32 bevel = 0.0f, F32 ang_scale = 1.f, S32 split = 0);
 
@@ -741,13 +762,29 @@ protected:
 class LLPath
 {
 public:
-	struct PathPt
+	class PathPt
 	{
-		LLVector3	 mPos;
-		LLVector2    mScale;
-		LLQuaternion mRot;
+	public:
+		LLMatrix4a   mRot;
+		LLVector4a	 mPos;
+		
+		LLVector4a   mScale;
 		F32			 mTexT;
-		PathPt() { mPos.setVec(0,0,0); mTexT = 0; mScale.setVec(0,0); mRot.loadIdentity(); }
+		F32 pad[3]; //for alignment
+		PathPt() 
+		{ 
+			mPos.clear(); 
+			mTexT = 0; 
+			mScale.clear(); 
+			mRot.setRows(LLVector4a(1,0,0,0),
+						LLVector4a(0,1,0,0),
+						LLVector4a(0,0,1,0));
+
+			//distinguished data in the pad for debugging
+			pad[0] = 3.14159f;
+			pad[1] = -3.14159f;
+			pad[2] = 0.585f;
+		}
 	};
 
 public:
@@ -779,7 +816,7 @@ public:
 	friend std::ostream& operator<<(std::ostream &s, const LLPath &path);
 
 public:
-	std::vector<PathPt> mPath;
+	LLAlignedArray<PathPt, 64> mPath;
 
 protected:
 	BOOL		  mOpen;
@@ -951,11 +988,7 @@ protected:
 	~LLVolume(); // use unref
 
 public:
-	struct Point
-	{
-		LLVector3 mPos;
-	};
-
+		
 	struct FaceParams
 	{
 		LLFaceID mFaceID;
@@ -978,8 +1011,8 @@ public:
 	const LLProfile& getProfile() const						{ return *mProfilep; }
 	LLPath& getPath() const									{ return *mPathp; }
 	void resizePath(S32 length);
-	const std::vector<Point>& getMesh() const				{ return mMesh; }
-	const LLVector3& getMeshPt(const U32 i) const			{ return mMesh[i].mPos; }
+	const LLAlignedArray<LLVector4a,64>&	getMesh() const				{ return mMesh; }
+	const LLVector4a& getMeshPt(const U32 i) const			{ return mMesh[i]; }
 
 	void setDirty() { mPathp->setDirty(); mProfilep->setDirty(); }
 
@@ -994,10 +1027,7 @@ public:
 	S32 getSculptLevel() const                              { return mSculptLevel; }
 	void setSculptLevel(S32 level)							{ mSculptLevel = level; }
 
-	S32 *getTriangleIndices(U32 &num_indices) const;
-
-	// returns number of triangle indeces required for path/profile mesh
-	S32 getNumTriangleIndices() const;
+	
 	static void getLoDTriangleCounts(const LLVolumeParams& params, S32* counts);
 
 	S32 getNumTriangles(S32* vcount = NULL) const;
@@ -1070,7 +1100,8 @@ public:
 	LLVolumeParams mParams;
 	LLPath *mPathp;
 	LLProfile *mProfilep;
-	std::vector<Point> mMesh;
+	LLAlignedArray<LLVector4a,64> mMesh;
+	
 	
 	BOOL mGenerateSingleFace;
 	typedef std::vector<LLVolumeFace> face_list_t;
diff --git a/indra/newview/llflexibleobject.cpp b/indra/newview/llflexibleobject.cpp
index 77a0cdffce..cd4718381b 100644
--- a/indra/newview/llflexibleobject.cpp
+++ b/indra/newview/llflexibleobject.cpp
@@ -683,30 +683,36 @@ void LLVolumeImplFlexible::doFlexibleUpdate()
 								LLVector4(z_axis, 0.f),
 								LLVector4(delta_pos, 1.f));
 			
+	LL_CHECK_MEMORY
 	for (i=0; i<=num_render_sections; ++i)
 	{
 		new_point = &path->mPath[i];
 		LLVector3 pos = newSection[i].mPosition * rel_xform;
 		LLQuaternion rot = mSection[i].mAxisRotation * newSection[i].mRotation * delta_rot;
-		
-		if (!mUpdated || (new_point->mPos-pos).magVec()/mVO->mDrawable->mDistanceWRTCamera > 0.001f)
+	
+		LLVector3 np(new_point->mPos.getF32ptr());
+
+		if (!mUpdated || (np-pos).magVec()/mVO->mDrawable->mDistanceWRTCamera > 0.001f)
 		{
-			new_point->mPos = newSection[i].mPosition * rel_xform;
+			new_point->mPos.load3((newSection[i].mPosition * rel_xform).mV);
 			mUpdated = FALSE;
 		}
 
-		new_point->mRot = rot;
-		new_point->mScale = newSection[i].mScale;
+		new_point->mRot.loadu(LLMatrix3(rot));
+		new_point->mScale.set(newSection[i].mScale.mV[0], newSection[i].mScale.mV[1], 0,1);
 		new_point->mTexT = ((F32)i)/(num_render_sections);
 	}
-
+	LL_CHECK_MEMORY
 	mLastSegmentRotation = parentSegmentRotation;
 }
 
+static LLFastTimer::DeclareTimer FTM_FLEXI_PREBUILD("Flexi Prebuild");
+
 void LLVolumeImplFlexible::preRebuild()
 {
 	if (!mUpdated)
 	{
+		LLFastTimer t(FTM_FLEXI_PREBUILD);
 		doFlexibleRebuild();
 	}
 }
diff --git a/indra/newview/llspatialpartition.h b/indra/newview/llspatialpartition.h
index b1706d9d35..b5543c4a37 100644
--- a/indra/newview/llspatialpartition.h
+++ b/indra/newview/llspatialpartition.h
@@ -739,7 +739,7 @@ class LLVolumeGeometryManager: public LLGeometryManager
 	virtual void rebuildGeom(LLSpatialGroup* group);
 	virtual void rebuildMesh(LLSpatialGroup* group);
 	virtual void getGeometry(LLSpatialGroup* group);
-	void genDrawInfo(LLSpatialGroup* group, U32 mask, std::vector<LLFace*>& faces, BOOL distance_sort = FALSE, BOOL batch_textures = FALSE);
+	void genDrawInfo(LLSpatialGroup* group, U32 mask, LLFace** faces, U32 face_count, BOOL distance_sort = FALSE, BOOL batch_textures = FALSE);
 	void registerFace(LLSpatialGroup* group, LLFace* facep, U32 type);
 };
 
diff --git a/indra/newview/llvovolume.cpp b/indra/newview/llvovolume.cpp
index 7adf18b6d0..597fb03526 100644
--- a/indra/newview/llvovolume.cpp
+++ b/indra/newview/llvovolume.cpp
@@ -1051,8 +1051,7 @@ BOOL LLVOVolume::setVolume(const LLVolumeParams &params_in, const S32 detail, bo
 				}
 			}
 		}
-
-
+		
 		static LLCachedControl<bool> use_transform_feedback(gSavedSettings, "RenderUseTransformFeedback");
 
 		bool cache_in_vram = use_transform_feedback && gTransformPositionProgram.mProgramObject &&
@@ -4242,11 +4241,20 @@ void LLVolumeGeometryManager::rebuildGeom(LLSpatialGroup* group)
 
 	mFaceList.clear();
 
-	std::vector<LLFace*> fullbright_faces;
-	std::vector<LLFace*> bump_faces;
-	std::vector<LLFace*> simple_faces;
+	const U32 MAX_FACE_COUNT = 4096;
+	
+	static LLFace** fullbright_faces = (LLFace**) ll_aligned_malloc(MAX_FACE_COUNT*sizeof(LLFace*),64);
+	static LLFace** bump_faces = (LLFace**) ll_aligned_malloc(MAX_FACE_COUNT*sizeof(LLFace*),64);
+	static LLFace** simple_faces = (LLFace**) ll_aligned_malloc(MAX_FACE_COUNT*sizeof(LLFace*),64);
+	static LLFace** alpha_faces = (LLFace**) ll_aligned_malloc(MAX_FACE_COUNT*sizeof(LLFace*),64);
+	
+	U32 fullbright_count = 0;
+	U32 bump_count = 0;
+	U32 simple_count = 0;
+	U32 alpha_count = 0;
+
 
-	std::vector<LLFace*> alpha_faces;
+	
 	U32 useage = group->mSpatialPartition->mBufferUsage;
 
 	U32 max_vertices = (gSavedSettings.getS32("RenderMaxVBOSize")*1024)/LLVertexBuffer::calcVertexSize(group->mSpatialPartition->mVertexDataMask);
@@ -4257,6 +4265,8 @@ void LLVolumeGeometryManager::rebuildGeom(LLSpatialGroup* group)
 
 	bool emissive = false;
 
+	
+
 	{
 		LLFastTimer t(FTM_REBUILD_VOLUME_FACE_LIST);
 
@@ -4558,7 +4568,10 @@ void LLVolumeGeometryManager::rebuildGeom(LLSpatialGroup* group)
 					{
 						if (facep->canRenderAsMask())
 						{ //can be treated as alpha mask
-							simple_faces.push_back(facep);
+							if (simple_count < MAX_FACE_COUNT)
+							{
+								simple_faces[simple_count++] = facep;
+							}
 						}
 						else
 						{
@@ -4566,7 +4579,10 @@ void LLVolumeGeometryManager::rebuildGeom(LLSpatialGroup* group)
 							{ //only treat as alpha in the pipeline if < 100% transparent
 								drawablep->setState(LLDrawable::HAS_ALPHA);
 							}
-							alpha_faces.push_back(facep);
+							if (alpha_count < MAX_FACE_COUNT)
+							{
+								alpha_faces[alpha_count++] = facep;
+							}
 						}
 					}
 					else
@@ -4581,33 +4597,51 @@ void LLVolumeGeometryManager::rebuildGeom(LLSpatialGroup* group)
 						{
 							if (te->getBumpmap())
 							{ //needs normal + binormal
-								bump_faces.push_back(facep);
+								if (bump_count < MAX_FACE_COUNT)
+								{
+									bump_faces[bump_count++] = facep;
+								}
 							}
 							else if (te->getShiny() || !te->getFullbright())
 							{ //needs normal
-								simple_faces.push_back(facep);
+								if (simple_count < MAX_FACE_COUNT)
+								{
+									simple_faces[simple_count++] = facep;
+								}
 							}
 							else 
 							{ //doesn't need normal
 								facep->setState(LLFace::FULLBRIGHT);
-								fullbright_faces.push_back(facep);
+								if (fullbright_count < MAX_FACE_COUNT)
+								{
+									fullbright_faces[fullbright_count++] = facep;
+								}
 							}
 						}
 						else
 						{
 							if (te->getBumpmap() && LLPipeline::sRenderBump)
 							{ //needs normal + binormal
-								bump_faces.push_back(facep);
+								if (bump_count < MAX_FACE_COUNT)
+								{
+									bump_faces[bump_count++] = facep;
+								}
 							}
 							else if ((te->getShiny() && LLPipeline::sRenderBump) ||
 								!(te->getFullbright() || bake_sunlight))
 							{ //needs normal
-								simple_faces.push_back(facep);
+								if (simple_count < MAX_FACE_COUNT)
+								{
+									simple_faces[simple_count++] = facep;
+								}
 							}
 							else 
 							{ //doesn't need normal
 								facep->setState(LLFace::FULLBRIGHT);
-								fullbright_faces.push_back(facep);
+								if (fullbright_count < MAX_FACE_COUNT)
+								{
+									fullbright_faces[fullbright_count++] = facep;
+								}
 							}
 						}
 					}
@@ -4657,17 +4691,17 @@ void LLVolumeGeometryManager::rebuildGeom(LLSpatialGroup* group)
 	if (batch_textures)
 	{
 		bump_mask |= LLVertexBuffer::MAP_BINORMAL;
-		genDrawInfo(group, simple_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, simple_faces, FALSE, TRUE);
-		genDrawInfo(group, fullbright_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, fullbright_faces, FALSE, TRUE);
-		genDrawInfo(group, bump_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, bump_faces, FALSE, FALSE);
-		genDrawInfo(group, alpha_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, alpha_faces, TRUE, TRUE);
+		genDrawInfo(group, simple_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, simple_faces, simple_count, FALSE, TRUE);
+		genDrawInfo(group, fullbright_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, fullbright_faces, fullbright_count, FALSE, TRUE);
+		genDrawInfo(group, bump_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, bump_faces, bump_count, FALSE, FALSE);
+		genDrawInfo(group, alpha_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, alpha_faces, alpha_count, TRUE, TRUE);
 	}
 	else
 	{
-		genDrawInfo(group, simple_mask, simple_faces);
-		genDrawInfo(group, fullbright_mask, fullbright_faces);
-		genDrawInfo(group, bump_mask, bump_faces, FALSE, TRUE);
-		genDrawInfo(group, alpha_mask, alpha_faces, TRUE);
+		genDrawInfo(group, simple_mask, simple_faces, simple_count);
+		genDrawInfo(group, fullbright_mask, fullbright_faces, fullbright_count);
+		genDrawInfo(group, bump_mask, bump_faces, bump_count,  FALSE, FALSE);
+		genDrawInfo(group, alpha_mask, alpha_faces, alpha_count, TRUE);
 	}
 	
 
@@ -4699,6 +4733,7 @@ void LLVolumeGeometryManager::rebuildGeom(LLSpatialGroup* group)
 	}
 }
 
+static LLFastTimer::DeclareTimer FTM_REBUILD_MESH_FLUSH("Flush Mesh");
 
 void LLVolumeGeometryManager::rebuildMesh(LLSpatialGroup* group)
 {
@@ -4708,11 +4743,14 @@ void LLVolumeGeometryManager::rebuildMesh(LLSpatialGroup* group)
 		LLFastTimer ftm(FTM_REBUILD_VOLUME_VB);
 		LLFastTimer t(FTM_REBUILD_VOLUME_GEN_DRAW_INFO); //make sure getgeometryvolume shows up in the right place in timers
 
-		S32 num_mapped_veretx_buffer = LLVertexBuffer::sMappedCount ;
-
 		group->mBuilt = 1.f;
 		
-		std::set<LLVertexBuffer*> mapped_buffers;
+		S32 num_mapped_vertex_buffer = LLVertexBuffer::sMappedCount ;
+
+		const U32 MAX_BUFFER_COUNT = 4096;
+		LLVertexBuffer* locked_buffer[MAX_BUFFER_COUNT];
+
+		U32 buffer_count = 0;
 
 		for (LLSpatialGroup::element_iter drawable_iter = group->getDataBegin(); drawable_iter != group->getDataEnd(); ++drawable_iter)
 		{
@@ -4722,7 +4760,7 @@ void LLVolumeGeometryManager::rebuildMesh(LLSpatialGroup* group)
 			{
 				LLVOVolume* vobj = drawablep->getVOVolume();
 				vobj->preRebuild();
-
+				
 				if (drawablep->isState(LLDrawable::ANIMATED_CHILD))
 				{
 					vobj->updateRelativeXform(true);
@@ -4747,9 +4785,9 @@ void LLVolumeGeometryManager::rebuildMesh(LLSpatialGroup* group)
 							}
 
 
-							if (buff->isLocked())
+							if (buff->isLocked() && buffer_count < MAX_BUFFER_COUNT)
 							{
-								mapped_buffers.insert(buff);
+								locked_buffer[buffer_count++] = buff;
 							}
 						}
 					}
@@ -4765,21 +4803,24 @@ void LLVolumeGeometryManager::rebuildMesh(LLSpatialGroup* group)
 			}
 		}
 		
-		for (std::set<LLVertexBuffer*>::iterator iter = mapped_buffers.begin(); iter != mapped_buffers.end(); ++iter)
 		{
-			(*iter)->flush();
-		}
-
-		// don't forget alpha
-		if(group != NULL && 
-		   !group->mVertexBuffer.isNull() && 
-		   group->mVertexBuffer->isLocked())
-		{
-			group->mVertexBuffer->flush();
+			LLFastTimer t(FTM_REBUILD_MESH_FLUSH);
+			for (LLVertexBuffer** iter = locked_buffer, ** end_iter = locked_buffer+buffer_count; iter != end_iter; ++iter)
+			{
+				(*iter)->flush();
+			}
+		
+			// don't forget alpha
+			if(group != NULL && 
+			   !group->mVertexBuffer.isNull() && 
+			   group->mVertexBuffer->isLocked())
+			{
+				group->mVertexBuffer->flush();
+			}
 		}
 
 		//if not all buffers are unmapped
-		if(num_mapped_veretx_buffer != LLVertexBuffer::sMappedCount) 
+		if(num_mapped_vertex_buffer != LLVertexBuffer::sMappedCount) 
 		{
 			llwarns << "Not all mapped vertex buffers are unmapped!" << llendl ; 
 			for (LLSpatialGroup::element_iter drawable_iter = group->getDataBegin(); drawable_iter != group->getDataEnd(); ++drawable_iter)
@@ -4839,7 +4880,7 @@ static LLFastTimer::DeclareTimer FTM_GEN_DRAW_INFO_RESIZE_VB("Resize VB");
 
 
 
-void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std::vector<LLFace*>& faces, BOOL distance_sort, BOOL batch_textures)
+void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, LLFace** faces, U32 face_count, BOOL distance_sort, BOOL batch_textures)
 {
 	LLFastTimer t(FTM_REBUILD_VOLUME_GEN_DRAW_INFO);
 
@@ -4875,17 +4916,18 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std::
 		if (!distance_sort)
 		{
 			//sort faces by things that break batches
-			std::sort(faces.begin(), faces.end(), CompareBatchBreakerModified());
+			std::sort(faces, faces+face_count, CompareBatchBreakerModified());
 		}
 		else
 		{
 			//sort faces by distance
-			std::sort(faces.begin(), faces.end(), LLFace::CompareDistanceGreater());
+			std::sort(faces, faces+face_count, LLFace::CompareDistanceGreater());
 		}
 	}
 				
 	bool hud_group = group->isHUDGroup() ;
-	std::vector<LLFace*>::iterator face_iter = faces.begin();
+	LLFace** face_iter = faces;
+	LLFace** end_faces = faces+face_count;
 	
 	LLSpatialGroup::buffer_map_t buffer_map;
 
@@ -4916,7 +4958,7 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std::
 
 	bool flexi = false;
 
-	while (face_iter != faces.end())
+	while (face_iter != end_faces)
 	{
 		//pull off next face
 		LLFace* facep = *face_iter;
@@ -4945,10 +4987,13 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std::
 		flexi = flexi || facep->getViewerObject()->getVolume()->isUnique();
 
 		//sum up vertices needed for this render batch
-		std::vector<LLFace*>::iterator i = face_iter;
+		LLFace** i = face_iter;
 		++i;
 		
-		std::vector<LLViewerTexture*> texture_list;
+		const U32 MAX_TEXTURE_COUNT = 32;
+		LLViewerTexture* texture_list[MAX_TEXTURE_COUNT];
+		
+		U32 texture_count = 0;
 
 		{
 			LLFastTimer t(FTM_GEN_DRAW_INFO_FACE_SIZE);
@@ -4956,12 +5001,15 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std::
 			{
 				U8 cur_tex = 0;
 				facep->setTextureIndex(cur_tex);
-				texture_list.push_back(tex);
-
+				if (texture_count < MAX_TEXTURE_COUNT)
+				{
+					texture_list[texture_count++] = tex;
+				}
+				
 				if (can_batch_texture(facep))
 				{ //populate texture_list with any textures that can be batched
 				  //move i to the next unbatchable face
-					while (i != faces.end())
+					while (i != end_faces)
 					{
 						facep = *i;
 						
@@ -4976,7 +5024,7 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std::
 							if (distance_sort)
 							{ //textures might be out of order, see if texture exists in current batch
 								bool found = false;
-								for (U32 tex_idx = 0; tex_idx < texture_list.size(); ++tex_idx)
+								for (U32 tex_idx = 0; tex_idx < texture_count; ++tex_idx)
 								{
 									if (facep->getTexture() == texture_list[tex_idx])
 									{
@@ -4988,7 +5036,7 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std::
 
 								if (!found)
 								{
-									cur_tex = texture_list.size();
+									cur_tex = texture_count;
 								}
 							}
 							else
@@ -5003,7 +5051,10 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std::
 
 							tex = facep->getTexture();
 
-							texture_list.push_back(tex);
+							if (texture_count < MAX_TEXTURE_COUNT)
+							{
+								texture_list[texture_count++] = tex;
+							}
 						}
 
 						if (geom_count + facep->getGeomCount() > max_vertices)
@@ -5026,7 +5077,7 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std::
 			}
 			else
 			{
-				while (i != faces.end() && 
+				while (i != end_faces && 
 					(LLPipeline::sTextureBindTest || (distance_sort || (*i)->getTexture() == tex)))
 				{
 					facep = *i;
-- 
cgit v1.2.3