From e68d5e248f73180def7c8928b32482347dd91de4 Mon Sep 17 00:00:00 2001
From: "Brad Payne (Vir Linden)" <vir@lindenlab.com>
Date: Wed, 14 Dec 2011 16:18:19 -0500
Subject: SH-2789 WIP - add asserts to check object address alignment where
 needed

---
 indra/llmath/llvector4a.cpp | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'indra/llmath/llvector4a.cpp')
diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp
index b66b7a7076..49b8754cd0 100644
--- a/indra/llmath/llvector4a.cpp
+++ b/indra/llmath/llvector4a.cpp
@@ -24,6 +24,7 @@
  * $/LicenseInfo$
  */
 
+#include "llmemory.h"
 #include "llmath.h"
 #include "llquantize.h"
 
@@ -189,6 +190,8 @@ void LLVector4a::quantize16( const LLVector4a& low, const LLVector4a& high )
 		LLVector4a oneOverDelta;
 		{
 			static LL_ALIGN_16( const F32 F_TWO_4A[4] ) = { 2.f, 2.f, 2.f, 2.f };
+			ll_assert_aligned(F_TWO_4A,16);
+			
 			LLVector4a two; two.load4a( F_TWO_4A );
 
 			// Here we use _mm_rcp_ps plus one round of newton-raphson
-- 
cgit v1.2.3


From 9c2e0d84f84fe7c38b1e9f7a127efc540b43f5aa Mon Sep 17 00:00:00 2001
From: "Brad Payne (Vir Linden)" <vir@lindenlab.com>
Date: Mon, 19 Dec 2011 18:17:18 -0500
Subject: SH-2789 WIP - various fixes to force 16-byte alignment

---
 indra/llmath/llvector4a.cpp | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)
 mode change 100644 => 100755 indra/llmath/llvector4a.cpp

(limited to 'indra/llmath/llvector4a.cpp')

diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp
old mode 100644
new mode 100755
index 49b8754cd0..7602ef0cb2
--- a/indra/llmath/llvector4a.cpp
+++ b/indra/llmath/llvector4a.cpp
@@ -41,11 +41,15 @@ extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast<const LLVector4a&> ( F
 
 /*static */void LLVector4a::memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes)
 {
+	memcpy((void*)dst,(const void*)src,bytes);
+#if 0
 	assert(src != NULL);
 	assert(dst != NULL);
 	assert(bytes > 0);
 	assert((bytes % sizeof(F32))== 0); 
-	
+	ll_assert_aligned(src,16);
+	ll_assert_aligned(dst,16);
+
 	F32* end = dst + (bytes / sizeof(F32) );
 
 	if (bytes > 64)
@@ -87,6 +91,7 @@ extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast<const LLVector4a&> ( F
 		dst += 4;
 		src += 4;
 	}
+#endif
 }
 
 void LLVector4a::setRotated( const LLRotation& rot, const LLVector4a& vec )
-- 
cgit v1.2.3


From c8682722ad6b889b13ce288c417cb6b82ed273ac Mon Sep 17 00:00:00 2001
From: "Brad Payne (Vir Linden)" <vir@lindenlab.com>
Date: Wed, 21 Dec 2011 17:02:47 -0500
Subject: SH-2789 WIP - aligned alloc and realloc

---
 indra/llmath/llvector4a.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'indra/llmath/llvector4a.cpp')

diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp
index 7602ef0cb2..480ccf4ed9 100755
--- a/indra/llmath/llvector4a.cpp
+++ b/indra/llmath/llvector4a.cpp
@@ -41,14 +41,15 @@ extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast<const LLVector4a&> ( F
 
 /*static */void LLVector4a::memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes)
 {
-	memcpy((void*)dst,(const void*)src,bytes);
-#if 0
+//	memcpy((void*)dst,(const void*)src,bytes);
+#if 1
 	assert(src != NULL);
 	assert(dst != NULL);
 	assert(bytes > 0);
 	assert((bytes % sizeof(F32))== 0); 
 	ll_assert_aligned(src,16);
 	ll_assert_aligned(dst,16);
+	assert(bytes%16==0);
 
 	F32* end = dst + (bytes / sizeof(F32) );
 
-- 
cgit v1.2.3


From 1435a8b9e6203911d2ebe9e3ba217f8eb20e3140 Mon Sep 17 00:00:00 2001
From: "Brad Payne (Vir Linden)" <vir@lindenlab.com>
Date: Wed, 4 Jan 2012 15:21:23 -0500
Subject: SH-2789 WIP - stricter calling of memcpyNonAliased16

---
 indra/llmath/llvector4a.cpp | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'indra/llmath/llvector4a.cpp')

diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp
index 480ccf4ed9..6edeb0fefe 100755
--- a/indra/llmath/llvector4a.cpp
+++ b/indra/llmath/llvector4a.cpp
@@ -41,8 +41,6 @@ extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast<const LLVector4a&> ( F
 
 /*static */void LLVector4a::memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes)
 {
-//	memcpy((void*)dst,(const void*)src,bytes);
-#if 1
 	assert(src != NULL);
 	assert(dst != NULL);
 	assert(bytes > 0);
@@ -92,7 +90,6 @@ extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast<const LLVector4a&> ( F
 		dst += 4;
 		src += 4;
 	}
-#endif
 }
 
 void LLVector4a::setRotated( const LLRotation& rot, const LLVector4a& vec )
-- 
cgit v1.2.3


From 6ac6736994240d9789a81bf585468bef50805fd8 Mon Sep 17 00:00:00 2001
From: Graham Madarasz <graham@lindenlab.com>
Date: Mon, 11 Mar 2013 16:00:25 -0700
Subject: Move 16b aligned memcpy and alignment utilities to llmem in llcommon
 for easier use elsewhere

---
 indra/llmath/llvector4a.cpp | 50 +--------------------------------------------
 1 file changed, 1 insertion(+), 49 deletions(-)

(limited to 'indra/llmath/llvector4a.cpp')

diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp
index 6edeb0fefe..570fa41a43 100644
--- a/indra/llmath/llvector4a.cpp
+++ b/indra/llmath/llvector4a.cpp
@@ -41,55 +41,7 @@ extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast<const LLVector4a&> ( F
 
 /*static */void LLVector4a::memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes)
 {
-	assert(src != NULL);
-	assert(dst != NULL);
-	assert(bytes > 0);
-	assert((bytes % sizeof(F32))== 0); 
-	ll_assert_aligned(src,16);
-	ll_assert_aligned(dst,16);
-	assert(bytes%16==0);
-
-	F32* end = dst + (bytes / sizeof(F32) );
-
-	if (bytes > 64)
-	{
-		F32* begin_64 = LL_NEXT_ALIGNED_ADDRESS_64(dst);
-		
-		//at least 64 (16*4) bytes before the end of the destination, switch to 16 byte copies
-		F32* end_64 = end-16;
-		
-		_mm_prefetch((char*)begin_64, _MM_HINT_NTA);
-		_mm_prefetch((char*)begin_64 + 64, _MM_HINT_NTA);
-		_mm_prefetch((char*)begin_64 + 128, _MM_HINT_NTA);
-		_mm_prefetch((char*)begin_64 + 192, _MM_HINT_NTA);
-		
-		while (dst < begin_64)
-		{
-			copy4a(dst, src);
-			dst += 4;
-			src += 4;
-		}
-		
-		while (dst < end_64)
-		{
-			_mm_prefetch((char*)src + 512, _MM_HINT_NTA);
-			_mm_prefetch((char*)dst + 512, _MM_HINT_NTA);
-			copy4a(dst, src);
-			copy4a(dst+4, src+4);
-			copy4a(dst+8, src+8);
-			copy4a(dst+12, src+12);
-			
-			dst += 16;
-			src += 16;
-		}
-	}
-
-	while (dst < end)
-	{
-		copy4a(dst, src);
-		dst += 4;
-		src += 4;
-	}
+        ll_memcpy_nonaliased_aligned_16((char*)dst, (char*)src, bytes);
 }
 
 void LLVector4a::setRotated( const LLRotation& rot, const LLVector4a& vec )
-- 
cgit v1.2.3


From bf6182daa8b4d7cea79310547f71d7a3155e17b0 Mon Sep 17 00:00:00 2001
From: Graham Madarasz <graham@lindenlab.com>
Date: Fri, 29 Mar 2013 07:50:08 -0700
Subject: Update Mac and Windows breakpad builds to latest

---
 indra/llmath/llvector4a.cpp | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 mode change 100644 => 100755 indra/llmath/llvector4a.cpp

(limited to 'indra/llmath/llvector4a.cpp')

diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp
old mode 100644
new mode 100755
-- 
cgit v1.2.3