summaryrefslogtreecommitdiff
path: root/indra/llcommon/llmemory.h
diff options
context:
space:
mode:
Diffstat (limited to 'indra/llcommon/llmemory.h')
-rwxr-xr-x[-rw-r--r--]indra/llcommon/llmemory.h292
1 files changed, 219 insertions, 73 deletions
diff --git a/indra/llcommon/llmemory.h b/indra/llcommon/llmemory.h
index e725bdd9fa..c4c9cc0566 100644..100755
--- a/indra/llcommon/llmemory.h
+++ b/indra/llcommon/llmemory.h
@@ -27,30 +27,112 @@
#define LLMEMORY_H
#include "linden_common.h"
+#include "llunits.h"
+#if !LL_WINDOWS
+#include <stdint.h>
+#endif
class LLMutex ;
#if LL_WINDOWS && LL_DEBUG
#define LL_CHECK_MEMORY llassert(_CrtCheckMemory());
#else
-#define LL_CHECK_MEMORY
+#define LL_CHECK_MEMORY
#endif
-inline void* ll_aligned_malloc( size_t size, int align )
-{
- void* mem = malloc( size + (align - 1) + sizeof(void*) );
- char* aligned = ((char*)mem) + sizeof(void*);
- aligned += align - ((uintptr_t)aligned & (align - 1));
- ((void**)aligned)[-1] = mem;
- return aligned;
+#if LL_WINDOWS
+#define LL_ALIGN_OF __alignof
+#else
+#define LL_ALIGN_OF __align_of__
+#endif
+
+#if LL_WINDOWS
+#define LL_DEFAULT_HEAP_ALIGN 8
+#elif LL_DARWIN
+#define LL_DEFAULT_HEAP_ALIGN 16
+#elif LL_LINUX
+#define LL_DEFAULT_HEAP_ALIGN 8
+#endif
+
+
+LL_COMMON_API void ll_assert_aligned_func(uintptr_t ptr,U32 alignment);
+
+#ifdef SHOW_ASSERT
+#define ll_assert_aligned(ptr,alignment) ll_assert_aligned_func(reinterpret_cast<uintptr_t>(ptr),((U32)alignment))
+#else
+#define ll_assert_aligned(ptr,alignment)
+#endif
+
+#include <xmmintrin.h>
+
+template <typename T> T* LL_NEXT_ALIGNED_ADDRESS(T* address)
+{
+ return reinterpret_cast<T*>(
+ (reinterpret_cast<uintptr_t>(address) + 0xF) & ~0xF);
}
-inline void ll_aligned_free( void* ptr )
-{
- free( ((void**)ptr)[-1] );
+template <typename T> T* LL_NEXT_ALIGNED_ADDRESS_64(T* address)
+{
+ return reinterpret_cast<T*>(
+ (reinterpret_cast<uintptr_t>(address) + 0x3F) & ~0x3F);
}
+#if LL_LINUX || LL_DARWIN
+
+#define LL_ALIGN_PREFIX(x)
+#define LL_ALIGN_POSTFIX(x) __attribute__((aligned(x)))
+
+#elif LL_WINDOWS
+
+#define LL_ALIGN_PREFIX(x) __declspec(align(x))
+#define LL_ALIGN_POSTFIX(x)
+
+#else
+#error "LL_ALIGN_PREFIX and LL_ALIGN_POSTFIX undefined"
+#endif
+
+#define LL_ALIGN_16(var) LL_ALIGN_PREFIX(16) var LL_ALIGN_POSTFIX(16)
+
+//------------------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------------------------
+ // for enable buffer overrun detection predefine LL_DEBUG_BUFFER_OVERRUN in current library
+ // change preprocessro code to: #if 1 && defined(LL_WINDOWS)
+
+#if 0 && defined(LL_WINDOWS)
+ void* ll_aligned_malloc_fallback( size_t size, int align );
+ void ll_aligned_free_fallback( void* ptr );
+//------------------------------------------------------------------------------------------------
+#else
+ inline void* ll_aligned_malloc_fallback( size_t size, int align )
+ {
+ #if defined(LL_WINDOWS)
+ return _aligned_malloc(size, align);
+ #else
+ void* mem = malloc( size + (align - 1) + sizeof(void*) );
+ char* aligned = ((char*)mem) + sizeof(void*);
+ aligned += align - ((uintptr_t)aligned & (align - 1));
+
+ ((void**)aligned)[-1] = mem;
+ return aligned;
+ #endif
+ }
+
+ inline void ll_aligned_free_fallback( void* ptr )
+ {
+ #if defined(LL_WINDOWS)
+ _aligned_free(ptr);
+ #else
+ if (ptr)
+ {
+ free( ((void**)ptr)[-1] );
+ }
+ #endif
+ }
+#endif
+//------------------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------------------------
+
#if !LL_USE_TCMALLOC
inline void* ll_aligned_malloc_16(size_t size) // returned hunk MUST be freed with ll_aligned_free_16().
{
@@ -112,7 +194,7 @@ inline void* ll_aligned_malloc_32(size_t size) // returned hunk MUST be freed wi
#if defined(LL_WINDOWS)
return _aligned_malloc(size, 32);
#elif defined(LL_DARWIN)
- return ll_aligned_malloc( size, 32 );
+ return ll_aligned_malloc_fallback( size, 32 );
#else
void *rtn;
if (LL_LIKELY(0 == posix_memalign(&rtn, 32, size)))
@@ -127,12 +209,127 @@ inline void ll_aligned_free_32(void *p)
#if defined(LL_WINDOWS)
_aligned_free(p);
#elif defined(LL_DARWIN)
- ll_aligned_free( p );
+ ll_aligned_free_fallback( p );
#else
free(p); // posix_memalign() is compatible with heap deallocator
#endif
}
+// general purpose dispatch functions that are forced inline so they can compile down to a single call
+template<size_t ALIGNMENT>
+LL_FORCE_INLINE void* ll_aligned_malloc(size_t size)
+{
+ if (LL_DEFAULT_HEAP_ALIGN % ALIGNMENT == 0)
+ {
+ return malloc(size);
+ }
+ else if (ALIGNMENT == 16)
+ {
+ return ll_aligned_malloc_16(size);
+ }
+ else if (ALIGNMENT == 32)
+ {
+ return ll_aligned_malloc_32(size);
+ }
+ else
+ {
+ return ll_aligned_malloc_fallback(size, ALIGNMENT);
+ }
+}
+
+template<size_t ALIGNMENT>
+LL_FORCE_INLINE void ll_aligned_free(void* ptr)
+{
+ if (ALIGNMENT == LL_DEFAULT_HEAP_ALIGN)
+ {
+ free(ptr);
+ }
+ else if (ALIGNMENT == 16)
+ {
+ ll_aligned_free_16(ptr);
+ }
+ else if (ALIGNMENT == 32)
+ {
+ return ll_aligned_free_32(ptr);
+ }
+ else
+ {
+ return ll_aligned_free_fallback(ptr);
+ }
+}
+
+// Copy words 16-byte blocks from src to dst. Source and destination MUST NOT OVERLAP.
+// Source and dest must be 16-byte aligned and size must be multiple of 16.
+//
+inline void ll_memcpy_nonaliased_aligned_16(char* __restrict dst, const char* __restrict src, size_t bytes)
+{
+ assert(src != NULL);
+ assert(dst != NULL);
+ assert(bytes > 0);
+ assert((bytes % sizeof(F32))== 0);
+ ll_assert_aligned(src,16);
+ ll_assert_aligned(dst,16);
+
+ assert((src < dst) ? ((src + bytes) <= dst) : ((dst + bytes) <= src));
+ assert(bytes%16==0);
+
+ char* end = dst + bytes;
+
+ if (bytes > 64)
+ {
+
+ // Find start of 64b aligned area within block
+ //
+ void* begin_64 = LL_NEXT_ALIGNED_ADDRESS_64(dst);
+
+ //at least 64 bytes before the end of the destination, switch to 16 byte copies
+ void* end_64 = end-64;
+
+ // Prefetch the head of the 64b area now
+ //
+ _mm_prefetch((char*)begin_64, _MM_HINT_NTA);
+ _mm_prefetch((char*)begin_64 + 64, _MM_HINT_NTA);
+ _mm_prefetch((char*)begin_64 + 128, _MM_HINT_NTA);
+ _mm_prefetch((char*)begin_64 + 192, _MM_HINT_NTA);
+
+ // Copy 16b chunks until we're 64b aligned
+ //
+ while (dst < begin_64)
+ {
+
+ _mm_store_ps((F32*)dst, _mm_load_ps((F32*)src));
+ dst += 16;
+ src += 16;
+ }
+
+ // Copy 64b chunks up to your tail
+ //
+ // might be good to shmoo the 512b prefetch offset
+ // (characterize performance for various values)
+ //
+ while (dst < end_64)
+ {
+ _mm_prefetch((char*)src + 512, _MM_HINT_NTA);
+ _mm_prefetch((char*)dst + 512, _MM_HINT_NTA);
+ _mm_store_ps((F32*)dst, _mm_load_ps((F32*)src));
+ _mm_store_ps((F32*)(dst + 16), _mm_load_ps((F32*)(src + 16)));
+ _mm_store_ps((F32*)(dst + 32), _mm_load_ps((F32*)(src + 32)));
+ _mm_store_ps((F32*)(dst + 48), _mm_load_ps((F32*)(src + 48)));
+ dst += 64;
+ src += 64;
+ }
+ }
+
+ // Copy remainder 16b tail chunks (or ALL 16b chunks for sub-64b copies)
+ //
+ while (dst < end)
+ {
+ _mm_store_ps((F32*)dst, _mm_load_ps((F32*)src));
+ dst += 16;
+ src += 16;
+ }
+}
+
#ifndef __DEBUG_PRIVATE_MEM__
#define __DEBUG_PRIVATE_MEM__ 0
#endif
@@ -148,70 +345,25 @@ public:
static U64 getCurrentRSS();
static U32 getWorkingSetSize();
static void* tryToAlloc(void* address, U32 size);
- static void initMaxHeapSizeGB(F32 max_heap_size_gb, BOOL prevent_heap_failure);
+ static void initMaxHeapSizeGB(F32Gigabytes max_heap_size, BOOL prevent_heap_failure);
static void updateMemoryInfo() ;
static void logMemoryInfo(BOOL update = FALSE);
static bool isMemoryPoolLow();
- static U32 getAvailableMemKB() ;
- static U32 getMaxMemKB() ;
- static U32 getAllocatedMemKB() ;
+ static U32Kilobytes getAvailableMemKB() ;
+ static U32Kilobytes getMaxMemKB() ;
+ static U32Kilobytes getAllocatedMemKB() ;
private:
static char* reserveMem;
- static U32 sAvailPhysicalMemInKB ;
- static U32 sMaxPhysicalMemInKB ;
- static U32 sAllocatedMemInKB;
- static U32 sAllocatedPageSizeInKB ;
+ static U32Kilobytes sAvailPhysicalMemInKB ;
+ static U32Kilobytes sMaxPhysicalMemInKB ;
+ static U32Kilobytes sAllocatedMemInKB;
+ static U32Kilobytes sAllocatedPageSizeInKB ;
- static U32 sMaxHeapSizeInKB;
+ static U32Kilobytes sMaxHeapSizeInKB;
static BOOL sEnableMemoryFailurePrevention;
};
-//----------------------------------------------------------------------------
-#if MEM_TRACK_MEM
-class LLMutex ;
-class LL_COMMON_API LLMemTracker
-{
-private:
- LLMemTracker() ;
- ~LLMemTracker() ;
-
-public:
- static void release() ;
- static LLMemTracker* getInstance() ;
-
- void track(const char* function, const int line) ;
- void preDraw(BOOL pause) ;
- void postDraw() ;
- const char* getNextLine() ;
-
-private:
- static LLMemTracker* sInstance ;
-
- char** mStringBuffer ;
- S32 mCapacity ;
- U32 mLastAllocatedMem ;
- S32 mCurIndex ;
- S32 mCounter;
- S32 mDrawnIndex;
- S32 mNumOfDrawn;
- BOOL mPaused;
- LLMutex* mMutexp ;
-};
-
-#define MEM_TRACK_RELEASE LLMemTracker::release() ;
-#define MEM_TRACK LLMemTracker::getInstance()->track(__FUNCTION__, __LINE__) ;
-
-#else // MEM_TRACK_MEM
-
-#define MEM_TRACK_RELEASE
-#define MEM_TRACK
-
-#endif // MEM_TRACK_MEM
-
-//----------------------------------------------------------------------------
-
-
//
//class LLPrivateMemoryPool defines a private memory pool for an application to use, so the application does not
//need to access the heap directly fro each memory allocation. Throught this, the allocation speed is faster,
@@ -541,13 +693,7 @@ void LLPrivateMemoryPoolTester::operator delete[](void* addr)
// LLSingleton moved to llsingleton.h
-LL_COMMON_API void ll_assert_aligned_func(uintptr_t ptr,U32 alignment);
-#ifdef SHOW_ASSERT
-#define ll_assert_aligned(ptr,alignment) ll_assert_aligned_func(reinterpret_cast<uintptr_t>(ptr),((U32)alignment))
-#else
-#define ll_assert_aligned(ptr,alignment)
-#endif
#endif