From 9c2e0d84f84fe7c38b1e9f7a127efc540b43f5aa Mon Sep 17 00:00:00 2001 From: "Brad Payne (Vir Linden)" Date: Mon, 19 Dec 2011 18:17:18 -0500 Subject: SH-2789 WIP - various fixes to force 16-byte alignment --- indra/llmath/lloctree.h | 4 +- indra/llmath/llvector4a.cpp | 7 +++- indra/llmath/llvolumeoctree.h | 14 ++++++- indra/llmath/tests/alignment_test.cpp | 74 +++++++++++++++-------------------- 4 files changed, 52 insertions(+), 47 deletions(-) mode change 100644 => 100755 indra/llmath/llvector4a.cpp mode change 100644 => 100755 indra/llmath/llvolumeoctree.h (limited to 'indra/llmath') diff --git a/indra/llmath/lloctree.h b/indra/llmath/lloctree.h index 3c1ae45d68..374858be51 100644 --- a/indra/llmath/lloctree.h +++ b/indra/llmath/lloctree.h @@ -88,7 +88,7 @@ public: typedef LLOctreeNode oct_node; typedef LLOctreeListener oct_listener; - /*void* operator new(size_t size) + void* operator new(size_t size) { return ll_aligned_malloc_16(size); } @@ -96,7 +96,7 @@ public: void operator delete(void* ptr) { ll_aligned_free_16(ptr); - }*/ + } LLOctreeNode( const LLVector4a& center, const LLVector4a& size, diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp old mode 100644 new mode 100755 index 49b8754cd0..7602ef0cb2 --- a/indra/llmath/llvector4a.cpp +++ b/indra/llmath/llvector4a.cpp @@ -41,11 +41,15 @@ extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast ( F /*static */void LLVector4a::memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes) { + memcpy((void*)dst,(const void*)src,bytes); +#if 0 assert(src != NULL); assert(dst != NULL); assert(bytes > 0); assert((bytes % sizeof(F32))== 0); - + ll_assert_aligned(src,16); + ll_assert_aligned(dst,16); + F32* end = dst + (bytes / sizeof(F32) ); if (bytes > 64) @@ -87,6 +91,7 @@ extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast ( F dst += 4; src += 4; } +#endif } void LLVector4a::setRotated( const LLRotation& rot, const LLVector4a& vec ) diff --git a/indra/llmath/llvolumeoctree.h b/indra/llmath/llvolumeoctree.h old mode 100644 new mode 100755 index 688d91dc40..6de7b223be --- a/indra/llmath/llvolumeoctree.h +++ b/indra/llmath/llvolumeoctree.h @@ -73,6 +73,16 @@ class LLVolumeOctreeListener : public LLOctreeListener { public: + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + LLVolumeOctreeListener(LLOctreeNode* node); ~LLVolumeOctreeListener(); @@ -99,8 +109,8 @@ public: public: - LLVector4a mBounds[2]; // bounding box (center, size) of this node and all its children (tight fit to objects) - LLVector4a mExtents[2]; // extents (min, max) of this node and all its children + LL_ALIGN_16(LLVector4a mBounds[2]); // bounding box (center, size) of this node and all its children (tight fit to objects) + LL_ALIGN_16(LLVector4a mExtents[2]); // extents (min, max) of this node and all its children }; class LLOctreeTriangleRayIntersect : public LLOctreeTraveler diff --git a/indra/llmath/tests/alignment_test.cpp b/indra/llmath/tests/alignment_test.cpp index 6b4a454fd2..51a7051e69 100755 --- a/indra/llmath/tests/alignment_test.cpp +++ b/indra/llmath/tests/alignment_test.cpp @@ -34,10 +34,21 @@ #include "../llsimdmath.h" #include "../llvector4a.h" +void* operator new(size_t size) +{ + return ll_aligned_malloc_16(size); +} + +void operator delete(void *p) +{ + ll_aligned_free_16(p); +} + namespace tut { #define is_aligned(ptr,alignment) ((reinterpret_cast(ptr))%(alignment)==0) +#define is_aligned_relative(ptr,base_ptr,alignment) ((reinterpret_cast(ptr)-reinterpret_cast(base_ptr))%(alignment)==0) struct alignment_test {}; @@ -51,38 +62,40 @@ class MyVector4a LLQuad mQ; } LL_ALIGN_POSTFIX(16); -LL_ALIGN_PREFIX(64) -class MyBigBlob +// Verify that aligned allocators perform as advertised. +template<> template<> +void alignment_test_object_t::test<1>() { -public: - ~MyBigBlob() {} -private: - LLQuad mQ[4]; -} LL_ALIGN_POSTFIX(64); + const int num_tests = 7; + void *align_ptr; + for (int i=0; i template<> -void alignment_test_object_t::test<1>() +void alignment_test_object_t::test<2>() { ensure("LLAlignment reality is broken: ", (1==1)); MyVector4a vec1; ensure("LLAlignment vec1 unaligned", is_aligned(&vec1,16)); - MyBigBlob bb1; - ensure("LLAlignment bb1 unaligned", is_aligned(&bb1,64)); - - MyVector4a veca[12]; ensure("LLAlignment veca unaligned", is_aligned(veca,16)); - - MyBigBlob bba[12]; - ensure("LLAlignment bba unaligned", is_aligned(bba,64)); } // Heap allocation of objects and arrays. template<> template<> -void alignment_test_object_t::test<2>() +void alignment_test_object_t::test<3>() { const int ARR_SIZE = 7; for(int i=0; i() } MyVector4a *veca = new MyVector4a[ARR_SIZE]; + ensure("LLAligment veca base", is_aligned(veca,16)); for(int i=0; i~MyBigBlob(); - _aligned_free(aligned_addr); - } - - ensure("LLAlignment big blob size",sizeof(MyBigBlob)==64); - void *aligned_addr = _aligned_malloc(ARR_SIZE*sizeof(MyBigBlob),64); - MyBigBlob *bba = new(aligned_addr) MyBigBlob[ARR_SIZE]; - std::cout << "aligned_addr " << aligned_addr << std::endl; - std::cout << "bba " << bba << std::endl; - for(int i=0; i