diff options
Diffstat (limited to 'indra')
-rw-r--r-- | indra/llcommon/llmemory.h | 78 | ||||
-rw-r--r-- | indra/llmath/llvector4a.h | 4 | ||||
-rw-r--r-- | indra/llmath/llvolume.cpp | 8 |
3 files changed, 10 insertions, 80 deletions
diff --git a/indra/llcommon/llmemory.h b/indra/llcommon/llmemory.h index d0e4bc9e25..528af83b8f 100644 --- a/indra/llcommon/llmemory.h +++ b/indra/llcommon/llmemory.h @@ -182,78 +182,6 @@ inline void ll_aligned_free_32(void *p) #endif } - -// Copy words 16-byte blocks from src to dst. Source and destination MUST NOT OVERLAP. -// Source and dest must be 16-byte aligned and size must be multiple of 16. -// -inline void ll_memcpy_nonaliased_aligned_16(char* __restrict dst, const char* __restrict src, size_t bytes) -{ - llassert(src != NULL); - llassert(dst != NULL); - llassert(bytes >= 16); - llassert((bytes % sizeof(F32))== 0); - llassert((src < dst) ? ((src + bytes) < dst) : ((dst + bytes) < src)); - llassert(bytes%16==0); - ll_assert_aligned(src,16); - ll_assert_aligned(dst,16); - - char* end = dst + bytes; - - if (bytes > 64) - { - - // Find start of 64b aligned area within block - // - void* begin_64 = LL_NEXT_ALIGNED_ADDRESS_64(dst); - - //at least 64 bytes before the end of the destination, switch to 16 byte copies - void* end_64 = end-64; - - // Prefetch the head of the 64b area now - // - _mm_prefetch((char*)begin_64, _MM_HINT_NTA); - _mm_prefetch((char*)begin_64 + 64, _MM_HINT_NTA); - _mm_prefetch((char*)begin_64 + 128, _MM_HINT_NTA); - _mm_prefetch((char*)begin_64 + 192, _MM_HINT_NTA); - - // Copy 16b chunks until we're 64b aligned - // - while (dst < begin_64) - { - - _mm_store_ps((F32*)dst, _mm_load_ps((F32*)src)); - dst += 16; - src += 16; - } - - // Copy 64b chunks up to your tail - // - // might be good to shmoo the 512b prefetch offset - // (characterize performance for various values) - // - while (dst < end_64) - { - _mm_prefetch((char*)src + 512, _MM_HINT_NTA); - _mm_prefetch((char*)dst + 512, _MM_HINT_NTA); - _mm_store_ps((F32*)dst, _mm_load_ps((F32*)src)); - _mm_store_ps((F32*)(dst + 16), _mm_load_ps((F32*)(src + 16))); - _mm_store_ps((F32*)(dst + 32), _mm_load_ps((F32*)(src + 32))); - _mm_store_ps((F32*)(dst + 48), _mm_load_ps((F32*)(src + 48))); - dst += 64; - src += 64; - } - } - - // Copy remainder 16b tail chunks (or ALL 16b chunks for sub-64b copies) - // - while (dst < end) - { - _mm_store_ps((F32*)dst, _mm_load_ps((F32*)src)); - dst += 16; - src += 16; - } -} - #ifndef __DEBUG_PRIVATE_MEM__ #define __DEBUG_PRIVATE_MEM__ 0 #endif @@ -662,7 +590,13 @@ void LLPrivateMemoryPoolTester::operator delete[](void* addr) // LLSingleton moved to llsingleton.h +LL_COMMON_API void ll_assert_aligned_func(uintptr_t ptr,U32 alignment); +#ifdef SHOW_ASSERT +#define ll_assert_aligned(ptr,alignment) ll_assert_aligned_func(reinterpret_cast<uintptr_t>(ptr),((U32)alignment)) +#else +#define ll_assert_aligned(ptr,alignment) +#endif #endif diff --git a/indra/llmath/llvector4a.h b/indra/llmath/llvector4a.h index 1a478bc8de..0526793d3a 100644 --- a/indra/llmath/llvector4a.h +++ b/indra/llmath/llvector4a.h @@ -93,11 +93,7 @@ public: LLVector4a() { //DO NOT INITIALIZE -- The overhead is completely unnecessary -// This assert is causing spurious referenced before set warnings on GCC 4.3.4 -// -#if !LL_LINUX ll_assert_aligned(this,16); -#endif } LLVector4a(F32 x, F32 y, F32 z, F32 w = 0.f) diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index edd16b5688..602f2c29e5 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -4729,13 +4729,13 @@ void LLVolumeFace::optimize(F32 angle_cutoff) } } - // disallow data amplification + // Only swap data if we've actually optimized the mesh // if (new_face.mNumVertices <= mNumVertices) { - llassert(new_face.mNumIndices == mNumIndices); - swapData(new_face); - } + llassert(new_face.mNumIndices == mNumIndices); + swapData(new_face); + } } class LLVCacheTriangleData; |