5 files changed, 14 insertions, 193 deletions
diff --git a/indra/llmath/llsimdmath.h b/indra/llmath/llsimdmath.h
index 01458521ec..cebd2ace7d 100644
--- a/indra/llmath/llsimdmath.h
+++ b/indra/llmath/llsimdmath.h
@@ -39,34 +39,6 @@
 #include <stdint.h>
 #endif
 
-template <typename T> T* LL_NEXT_ALIGNED_ADDRESS(T* address) 
-{ 
-	return reinterpret_cast<T*>(
-		(reinterpret_cast<uintptr_t>(address) + 0xF) & ~0xF);
-}
-
-template <typename T> T* LL_NEXT_ALIGNED_ADDRESS_64(T* address) 
-{ 
-	return reinterpret_cast<T*>(
-		(reinterpret_cast<uintptr_t>(address) + 0x3F) & ~0x3F);
-}
-
-#if LL_LINUX || LL_DARWIN
-
-#define			LL_ALIGN_PREFIX(x)
-#define			LL_ALIGN_POSTFIX(x)		__attribute__((aligned(x)))
-
-#elif LL_WINDOWS
-
-#define			LL_ALIGN_PREFIX(x)		__declspec(align(x))
-#define			LL_ALIGN_POSTFIX(x)
-
-#else
-#error "LL_ALIGN_PREFIX and LL_ALIGN_POSTFIX undefined"
-#endif
-
-#define LL_ALIGN_16(var) LL_ALIGN_PREFIX(16) var LL_ALIGN_POSTFIX(16)
-
 #include <xmmintrin.h>
 #include <emmintrin.h>
 
diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp
index 6edeb0fefe..570fa41a43 100644
--- a/indra/llmath/llvector4a.cpp
+++ b/indra/llmath/llvector4a.cpp
@@ -41,55 +41,7 @@ extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast<const LLVector4a&> ( F
 
 /*static */void LLVector4a::memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes)
 {
-	assert(src != NULL);
-	assert(dst != NULL);
-	assert(bytes > 0);
-	assert((bytes % sizeof(F32))== 0); 
-	ll_assert_aligned(src,16);
-	ll_assert_aligned(dst,16);
-	assert(bytes%16==0);
-
-	F32* end = dst + (bytes / sizeof(F32) );
-
-	if (bytes > 64)
-	{
-		F32* begin_64 = LL_NEXT_ALIGNED_ADDRESS_64(dst);
-		
-		//at least 64 (16*4) bytes before the end of the destination, switch to 16 byte copies
-		F32* end_64 = end-16;
-		
-		_mm_prefetch((char*)begin_64, _MM_HINT_NTA);
-		_mm_prefetch((char*)begin_64 + 64, _MM_HINT_NTA);
-		_mm_prefetch((char*)begin_64 + 128, _MM_HINT_NTA);
-		_mm_prefetch((char*)begin_64 + 192, _MM_HINT_NTA);
-		
-		while (dst < begin_64)
-		{
-			copy4a(dst, src);
-			dst += 4;
-			src += 4;
-		}
-		
-		while (dst < end_64)
-		{
-			_mm_prefetch((char*)src + 512, _MM_HINT_NTA);
-			_mm_prefetch((char*)dst + 512, _MM_HINT_NTA);
-			copy4a(dst, src);
-			copy4a(dst+4, src+4);
-			copy4a(dst+8, src+8);
-			copy4a(dst+12, src+12);
-			
-			dst += 16;
-			src += 16;
-		}
-	}
-
-	while (dst < end)
-	{
-		copy4a(dst, src);
-		dst += 4;
-		src += 4;
-	}
+        ll_memcpy_nonaliased_aligned_16((char*)dst, (char*)src, bytes);
 }
 
 void LLVector4a::setRotated( const LLRotation& rot, const LLVector4a& vec )
diff --git a/indra/llmath/llvector4a.h b/indra/llmath/llvector4a.h
index 0526793d3a..1a478bc8de 100644
--- a/indra/llmath/llvector4a.h
+++ b/indra/llmath/llvector4a.h
@@ -93,7 +93,11 @@ public:
 	
 	LLVector4a()
 	{ //DO NOT INITIALIZE -- The overhead is completely unnecessary
+// This assert is causing spurious referenced before set warnings on GCC 4.3.4
+//
+#if !LL_LINUX
 		ll_assert_aligned(this,16);
+#endif
 	}
 	
 	LLVector4a(F32 x, F32 y, F32 z, F32 w = 0.f)
diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp
index 9fc72fd801..edd16b5688 100644
--- a/indra/llmath/llvolume.cpp
+++ b/indra/llmath/llvolume.cpp
@@ -96,93 +96,6 @@ extern BOOL gDebugGL;
 
 bool less_than_max_mag(const LLVector4a& vec);
 
-template <class T, U32 alignment>
-LLAlignedArray<T, alignment>::LLAlignedArray()
-{
-	mArray = NULL;
-	mElementCount = 0;
-	mCapacity = 0;
-}
-
-template <class T, U32 alignment>
-LLAlignedArray<T, alignment>::~LLAlignedArray()
-{
-	ll_aligned_free(mArray);
-	mArray = NULL;
-	mElementCount = 0;
-	mCapacity = 0;
-}
-
-template <class T, U32 alignment>
-void LLAlignedArray<T, alignment>::push_back(const T& elem)
-{
-	T* old_buf = NULL;
-	if (mCapacity <= mElementCount)
-	{
-		mCapacity++;
-		mCapacity *= 2;
-		T* new_buf = (T*) ll_aligned_malloc(mCapacity*sizeof(T), alignment);
-		if (mArray)
-		{
-			LLVector4a::memcpyNonAliased16((F32*) new_buf, (F32*) mArray, sizeof(T)*mElementCount);
-		}
-		old_buf = mArray;
-		mArray = new_buf;
-	}
-
-	mArray[mElementCount++] = elem;
-
-	//delete old array here to prevent error on a.push_back(a[0])
-	ll_aligned_free(old_buf);
-}
-
-template <class T, U32 alignment>
-void LLAlignedArray<T, alignment>::resize(U32 size)
-{
-	if (mCapacity < size)
-	{
-		mCapacity = size+mCapacity*2;
-		T* new_buf = mCapacity > 0 ? (T*) ll_aligned_malloc(mCapacity*sizeof(T), alignment) : NULL;
-		if (mArray)
-		{
-			LLVector4a::memcpyNonAliased16((F32*) new_buf, (F32*) mArray, sizeof(T)*mElementCount);
-			ll_aligned_free(mArray);
-		}
-
-		/*for (U32 i = mElementCount; i < mCapacity; ++i)
-		{
-			new(new_buf+i) T();
-		}*/
-		mArray = new_buf;
-	}
-
-	mElementCount = size;
-}
-
-
-template <class T, U32 alignment>
-T& LLAlignedArray<T, alignment>::operator[](int idx)
-{
-	llassert(idx < mElementCount);
-	return mArray[idx];
-}
-
-template <class T, U32 alignment>
-const T& LLAlignedArray<T, alignment>::operator[](int idx) const
-{
-	llassert(idx < mElementCount);
-	return mArray[idx];
-}
-
-template <class T, U32 alignment>
-T* LLAlignedArray<T, alignment>::append(S32 N)
-{
-	U32 sz = size();
-	resize(sz+N);
-	return &((*this)[sz]);
-}
-
-
 BOOL check_same_clock_dir( const LLVector3& pt1, const LLVector3& pt2, const LLVector3& pt3, const LLVector3& norm)
 {    
 	LLVector3 test = (pt2-pt1)%(pt3-pt2);
@@ -4816,10 +4729,13 @@ void LLVolumeFace::optimize(F32 angle_cutoff)
 		}
 	}
 
-	llassert(new_face.mNumIndices == mNumIndices);
-	llassert(new_face.mNumVertices <= mNumVertices);
-
-	swapData(new_face);
+	// disallow data amplification
+	//
+	if (new_face.mNumVertices <= mNumVertices)
+	{
+	    llassert(new_face.mNumIndices == mNumIndices);
+	    swapData(new_face);
+    }
 }
 
 class LLVCacheTriangleData;
@@ -5400,12 +5316,7 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build)
 	S32 max_t = volume->getPath().mPath.size();
 
 	// S32 i;
-	S32 num_vertices = 0, num_indices = 0;
 	S32	grid_size = (profile.size()-1)/4;
-	S32	quad_count = (grid_size * grid_size);
-
-	num_vertices = (grid_size+1)*(grid_size+1);
-	num_indices = quad_count * 4;
 
 	LLVector4a& min = mExtents[0];
 	LLVector4a& max = mExtents[1];
@@ -6822,3 +6733,4 @@ void calc_binormal_from_triangle(LLVector4a& binormal,
 		binormal.set( 0, 1 , 0 );
 	}
 }
+
diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h
index 5e43af92ec..6b599a4126 100644
--- a/indra/llmath/llvolume.h
+++ b/indra/llmath/llvolume.h
@@ -57,6 +57,7 @@ class LLVolumeTriangle;
 #include "llrefcount.h"
 #include "llpointer.h"
 #include "llfile.h"
+#include "llalignedarray.h"
 
 //============================================================================
 
@@ -195,26 +196,6 @@ const U8 LL_SCULPT_FLAG_MIRROR    = 128;
 
 const S32 LL_SCULPT_MESH_MAX_FACES = 8;
 
-template <class T, U32 alignment>
-class LLAlignedArray
-{
-public:
-	T* mArray;
-	U32 mElementCount;
-	U32 mCapacity;
-
-	LLAlignedArray();
-	~LLAlignedArray();
-
-	void push_back(const T& elem);
-	U32 size() const { return mElementCount; }
-	void resize(U32 size);
-	T* append(S32 N);
-	T& operator[](int idx);
-	const T& operator[](int idx) const;
-};
-
-
 class LLProfileParams
 {
 public: