From e68d5e248f73180def7c8928b32482347dd91de4 Mon Sep 17 00:00:00 2001 From: "Brad Payne (Vir Linden)" Date: Wed, 14 Dec 2011 16:18:19 -0500 Subject: SH-2789 WIP - add asserts to check object address alignment where needed --- indra/llmath/llvector4a.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'indra/llmath/llvector4a.cpp') diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp index b66b7a7076..49b8754cd0 100644 --- a/indra/llmath/llvector4a.cpp +++ b/indra/llmath/llvector4a.cpp @@ -24,6 +24,7 @@ * $/LicenseInfo$ */ +#include "llmemory.h" #include "llmath.h" #include "llquantize.h" @@ -189,6 +190,8 @@ void LLVector4a::quantize16( const LLVector4a& low, const LLVector4a& high ) LLVector4a oneOverDelta; { static LL_ALIGN_16( const F32 F_TWO_4A[4] ) = { 2.f, 2.f, 2.f, 2.f }; + ll_assert_aligned(F_TWO_4A,16); + LLVector4a two; two.load4a( F_TWO_4A ); // Here we use _mm_rcp_ps plus one round of newton-raphson -- cgit v1.2.3 From 9c2e0d84f84fe7c38b1e9f7a127efc540b43f5aa Mon Sep 17 00:00:00 2001 From: "Brad Payne (Vir Linden)" Date: Mon, 19 Dec 2011 18:17:18 -0500 Subject: SH-2789 WIP - various fixes to force 16-byte alignment --- indra/llmath/llvector4a.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) mode change 100644 => 100755 indra/llmath/llvector4a.cpp (limited to 'indra/llmath/llvector4a.cpp') diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp old mode 100644 new mode 100755 index 49b8754cd0..7602ef0cb2 --- a/indra/llmath/llvector4a.cpp +++ b/indra/llmath/llvector4a.cpp @@ -41,11 +41,15 @@ extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast ( F /*static */void LLVector4a::memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes) { + memcpy((void*)dst,(const void*)src,bytes); +#if 0 assert(src != NULL); assert(dst != NULL); assert(bytes > 0); assert((bytes % sizeof(F32))== 0); - + ll_assert_aligned(src,16); + ll_assert_aligned(dst,16); + F32* end = dst + (bytes / sizeof(F32) ); if (bytes > 64) @@ -87,6 +91,7 @@ extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast ( F dst += 4; src += 4; } +#endif } void LLVector4a::setRotated( const LLRotation& rot, const LLVector4a& vec ) -- cgit v1.2.3 From c8682722ad6b889b13ce288c417cb6b82ed273ac Mon Sep 17 00:00:00 2001 From: "Brad Payne (Vir Linden)" Date: Wed, 21 Dec 2011 17:02:47 -0500 Subject: SH-2789 WIP - aligned alloc and realloc --- indra/llmath/llvector4a.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'indra/llmath/llvector4a.cpp') diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp index 7602ef0cb2..480ccf4ed9 100755 --- a/indra/llmath/llvector4a.cpp +++ b/indra/llmath/llvector4a.cpp @@ -41,14 +41,15 @@ extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast ( F /*static */void LLVector4a::memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes) { - memcpy((void*)dst,(const void*)src,bytes); -#if 0 +// memcpy((void*)dst,(const void*)src,bytes); +#if 1 assert(src != NULL); assert(dst != NULL); assert(bytes > 0); assert((bytes % sizeof(F32))== 0); ll_assert_aligned(src,16); ll_assert_aligned(dst,16); + assert(bytes%16==0); F32* end = dst + (bytes / sizeof(F32) ); -- cgit v1.2.3 From 1435a8b9e6203911d2ebe9e3ba217f8eb20e3140 Mon Sep 17 00:00:00 2001 From: "Brad Payne (Vir Linden)" Date: Wed, 4 Jan 2012 15:21:23 -0500 Subject: SH-2789 WIP - stricter calling of memcpyNonAliased16 --- indra/llmath/llvector4a.cpp | 3 --- 1 file changed, 3 deletions(-) (limited to 'indra/llmath/llvector4a.cpp') diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp index 480ccf4ed9..6edeb0fefe 100755 --- a/indra/llmath/llvector4a.cpp +++ b/indra/llmath/llvector4a.cpp @@ -41,8 +41,6 @@ extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast ( F /*static */void LLVector4a::memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes) { -// memcpy((void*)dst,(const void*)src,bytes); -#if 1 assert(src != NULL); assert(dst != NULL); assert(bytes > 0); @@ -92,7 +90,6 @@ extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast ( F dst += 4; src += 4; } -#endif } void LLVector4a::setRotated( const LLRotation& rot, const LLVector4a& vec ) -- cgit v1.2.3 From 6ac6736994240d9789a81bf585468bef50805fd8 Mon Sep 17 00:00:00 2001 From: Graham Madarasz Date: Mon, 11 Mar 2013 16:00:25 -0700 Subject: Move 16b aligned memcpy and alignment utilities to llmem in llcommon for easier use elsewhere --- indra/llmath/llvector4a.cpp | 50 +-------------------------------------------- 1 file changed, 1 insertion(+), 49 deletions(-) (limited to 'indra/llmath/llvector4a.cpp') diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp index 6edeb0fefe..570fa41a43 100644 --- a/indra/llmath/llvector4a.cpp +++ b/indra/llmath/llvector4a.cpp @@ -41,55 +41,7 @@ extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast ( F /*static */void LLVector4a::memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes) { - assert(src != NULL); - assert(dst != NULL); - assert(bytes > 0); - assert((bytes % sizeof(F32))== 0); - ll_assert_aligned(src,16); - ll_assert_aligned(dst,16); - assert(bytes%16==0); - - F32* end = dst + (bytes / sizeof(F32) ); - - if (bytes > 64) - { - F32* begin_64 = LL_NEXT_ALIGNED_ADDRESS_64(dst); - - //at least 64 (16*4) bytes before the end of the destination, switch to 16 byte copies - F32* end_64 = end-16; - - _mm_prefetch((char*)begin_64, _MM_HINT_NTA); - _mm_prefetch((char*)begin_64 + 64, _MM_HINT_NTA); - _mm_prefetch((char*)begin_64 + 128, _MM_HINT_NTA); - _mm_prefetch((char*)begin_64 + 192, _MM_HINT_NTA); - - while (dst < begin_64) - { - copy4a(dst, src); - dst += 4; - src += 4; - } - - while (dst < end_64) - { - _mm_prefetch((char*)src + 512, _MM_HINT_NTA); - _mm_prefetch((char*)dst + 512, _MM_HINT_NTA); - copy4a(dst, src); - copy4a(dst+4, src+4); - copy4a(dst+8, src+8); - copy4a(dst+12, src+12); - - dst += 16; - src += 16; - } - } - - while (dst < end) - { - copy4a(dst, src); - dst += 4; - src += 4; - } + ll_memcpy_nonaliased_aligned_16((char*)dst, (char*)src, bytes); } void LLVector4a::setRotated( const LLRotation& rot, const LLVector4a& vec ) -- cgit v1.2.3 From bf6182daa8b4d7cea79310547f71d7a3155e17b0 Mon Sep 17 00:00:00 2001 From: Graham Madarasz Date: Fri, 29 Mar 2013 07:50:08 -0700 Subject: Update Mac and Windows breakpad builds to latest --- indra/llmath/llvector4a.cpp | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 indra/llmath/llvector4a.cpp (limited to 'indra/llmath/llvector4a.cpp') diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp old mode 100644 new mode 100755 -- cgit v1.2.3