diff options
Diffstat (limited to 'indra/llmath')
-rw-r--r-- | indra/llmath/llsimdmath.h | 28 | ||||
-rw-r--r-- | indra/llmath/llvector4a.cpp | 50 | ||||
-rw-r--r-- | indra/llmath/llvector4a.h | 4 | ||||
-rw-r--r-- | indra/llmath/llvolume.cpp | 104 | ||||
-rw-r--r-- | indra/llmath/llvolume.h | 21 |
5 files changed, 14 insertions, 193 deletions
diff --git a/indra/llmath/llsimdmath.h b/indra/llmath/llsimdmath.h index 01458521ec..cebd2ace7d 100644 --- a/indra/llmath/llsimdmath.h +++ b/indra/llmath/llsimdmath.h @@ -39,34 +39,6 @@ #include <stdint.h> #endif -template <typename T> T* LL_NEXT_ALIGNED_ADDRESS(T* address) -{ - return reinterpret_cast<T*>( - (reinterpret_cast<uintptr_t>(address) + 0xF) & ~0xF); -} - -template <typename T> T* LL_NEXT_ALIGNED_ADDRESS_64(T* address) -{ - return reinterpret_cast<T*>( - (reinterpret_cast<uintptr_t>(address) + 0x3F) & ~0x3F); -} - -#if LL_LINUX || LL_DARWIN - -#define LL_ALIGN_PREFIX(x) -#define LL_ALIGN_POSTFIX(x) __attribute__((aligned(x))) - -#elif LL_WINDOWS - -#define LL_ALIGN_PREFIX(x) __declspec(align(x)) -#define LL_ALIGN_POSTFIX(x) - -#else -#error "LL_ALIGN_PREFIX and LL_ALIGN_POSTFIX undefined" -#endif - -#define LL_ALIGN_16(var) LL_ALIGN_PREFIX(16) var LL_ALIGN_POSTFIX(16) - #include <xmmintrin.h> #include <emmintrin.h> diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp index 6edeb0fefe..570fa41a43 100644 --- a/indra/llmath/llvector4a.cpp +++ b/indra/llmath/llvector4a.cpp @@ -41,55 +41,7 @@ extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast<const LLVector4a&> ( F /*static */void LLVector4a::memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes) { - assert(src != NULL); - assert(dst != NULL); - assert(bytes > 0); - assert((bytes % sizeof(F32))== 0); - ll_assert_aligned(src,16); - ll_assert_aligned(dst,16); - assert(bytes%16==0); - - F32* end = dst + (bytes / sizeof(F32) ); - - if (bytes > 64) - { - F32* begin_64 = LL_NEXT_ALIGNED_ADDRESS_64(dst); - - //at least 64 (16*4) bytes before the end of the destination, switch to 16 byte copies - F32* end_64 = end-16; - - _mm_prefetch((char*)begin_64, _MM_HINT_NTA); - _mm_prefetch((char*)begin_64 + 64, _MM_HINT_NTA); - _mm_prefetch((char*)begin_64 + 128, _MM_HINT_NTA); - _mm_prefetch((char*)begin_64 + 192, _MM_HINT_NTA); - - while (dst < begin_64) - { - copy4a(dst, src); - dst += 4; - src += 4; - } - - while (dst < end_64) - { - _mm_prefetch((char*)src + 512, _MM_HINT_NTA); - _mm_prefetch((char*)dst + 512, _MM_HINT_NTA); - copy4a(dst, src); - copy4a(dst+4, src+4); - copy4a(dst+8, src+8); - copy4a(dst+12, src+12); - - dst += 16; - src += 16; - } - } - - while (dst < end) - { - copy4a(dst, src); - dst += 4; - src += 4; - } + ll_memcpy_nonaliased_aligned_16((char*)dst, (char*)src, bytes); } void LLVector4a::setRotated( const LLRotation& rot, const LLVector4a& vec ) diff --git a/indra/llmath/llvector4a.h b/indra/llmath/llvector4a.h index 0526793d3a..1a478bc8de 100644 --- a/indra/llmath/llvector4a.h +++ b/indra/llmath/llvector4a.h @@ -93,7 +93,11 @@ public: LLVector4a() { //DO NOT INITIALIZE -- The overhead is completely unnecessary +// This assert is causing spurious referenced before set warnings on GCC 4.3.4 +// +#if !LL_LINUX ll_assert_aligned(this,16); +#endif } LLVector4a(F32 x, F32 y, F32 z, F32 w = 0.f) diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 9fc72fd801..edd16b5688 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -96,93 +96,6 @@ extern BOOL gDebugGL; bool less_than_max_mag(const LLVector4a& vec); -template <class T, U32 alignment> -LLAlignedArray<T, alignment>::LLAlignedArray() -{ - mArray = NULL; - mElementCount = 0; - mCapacity = 0; -} - -template <class T, U32 alignment> -LLAlignedArray<T, alignment>::~LLAlignedArray() -{ - ll_aligned_free(mArray); - mArray = NULL; - mElementCount = 0; - mCapacity = 0; -} - -template <class T, U32 alignment> -void LLAlignedArray<T, alignment>::push_back(const T& elem) -{ - T* old_buf = NULL; - if (mCapacity <= mElementCount) - { - mCapacity++; - mCapacity *= 2; - T* new_buf = (T*) ll_aligned_malloc(mCapacity*sizeof(T), alignment); - if (mArray) - { - LLVector4a::memcpyNonAliased16((F32*) new_buf, (F32*) mArray, sizeof(T)*mElementCount); - } - old_buf = mArray; - mArray = new_buf; - } - - mArray[mElementCount++] = elem; - - //delete old array here to prevent error on a.push_back(a[0]) - ll_aligned_free(old_buf); -} - -template <class T, U32 alignment> -void LLAlignedArray<T, alignment>::resize(U32 size) -{ - if (mCapacity < size) - { - mCapacity = size+mCapacity*2; - T* new_buf = mCapacity > 0 ? (T*) ll_aligned_malloc(mCapacity*sizeof(T), alignment) : NULL; - if (mArray) - { - LLVector4a::memcpyNonAliased16((F32*) new_buf, (F32*) mArray, sizeof(T)*mElementCount); - ll_aligned_free(mArray); - } - - /*for (U32 i = mElementCount; i < mCapacity; ++i) - { - new(new_buf+i) T(); - }*/ - mArray = new_buf; - } - - mElementCount = size; -} - - -template <class T, U32 alignment> -T& LLAlignedArray<T, alignment>::operator[](int idx) -{ - llassert(idx < mElementCount); - return mArray[idx]; -} - -template <class T, U32 alignment> -const T& LLAlignedArray<T, alignment>::operator[](int idx) const -{ - llassert(idx < mElementCount); - return mArray[idx]; -} - -template <class T, U32 alignment> -T* LLAlignedArray<T, alignment>::append(S32 N) -{ - U32 sz = size(); - resize(sz+N); - return &((*this)[sz]); -} - - BOOL check_same_clock_dir( const LLVector3& pt1, const LLVector3& pt2, const LLVector3& pt3, const LLVector3& norm) { LLVector3 test = (pt2-pt1)%(pt3-pt2); @@ -4816,10 +4729,13 @@ void LLVolumeFace::optimize(F32 angle_cutoff) } } - llassert(new_face.mNumIndices == mNumIndices); - llassert(new_face.mNumVertices <= mNumVertices); - - swapData(new_face); + // disallow data amplification + // + if (new_face.mNumVertices <= mNumVertices) + { + llassert(new_face.mNumIndices == mNumIndices); + swapData(new_face); + } } class LLVCacheTriangleData; @@ -5400,12 +5316,7 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) S32 max_t = volume->getPath().mPath.size(); // S32 i; - S32 num_vertices = 0, num_indices = 0; S32 grid_size = (profile.size()-1)/4; - S32 quad_count = (grid_size * grid_size); - - num_vertices = (grid_size+1)*(grid_size+1); - num_indices = quad_count * 4; LLVector4a& min = mExtents[0]; LLVector4a& max = mExtents[1]; @@ -6822,3 +6733,4 @@ void calc_binormal_from_triangle(LLVector4a& binormal, binormal.set( 0, 1 , 0 ); } } + diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h index 5e43af92ec..6b599a4126 100644 --- a/indra/llmath/llvolume.h +++ b/indra/llmath/llvolume.h @@ -57,6 +57,7 @@ class LLVolumeTriangle; #include "llrefcount.h" #include "llpointer.h" #include "llfile.h" +#include "llalignedarray.h" //============================================================================ @@ -195,26 +196,6 @@ const U8 LL_SCULPT_FLAG_MIRROR = 128; const S32 LL_SCULPT_MESH_MAX_FACES = 8; -template <class T, U32 alignment> -class LLAlignedArray -{ -public: - T* mArray; - U32 mElementCount; - U32 mCapacity; - - LLAlignedArray(); - ~LLAlignedArray(); - - void push_back(const T& elem); - U32 size() const { return mElementCount; } - void resize(U32 size); - T* append(S32 N); - T& operator[](int idx); - const T& operator[](int idx) const; -}; - - class LLProfileParams { public: |