summaryrefslogtreecommitdiff
path: root/indra/llcommon/llmemory.h
diff options
context:
space:
mode:
Diffstat (limited to 'indra/llcommon/llmemory.h')
-rw-r--r--indra/llcommon/llmemory.h836
1 files changed, 418 insertions, 418 deletions
diff --git a/indra/llcommon/llmemory.h b/indra/llcommon/llmemory.h
index ea360881c6..2c3f66fab8 100644
--- a/indra/llcommon/llmemory.h
+++ b/indra/llcommon/llmemory.h
@@ -1,418 +1,418 @@
-/**
- * @file llmemory.h
- * @brief Memory allocation/deallocation header-stuff goes here.
- *
- * $LicenseInfo:firstyear=2002&license=viewerlgpl$
- * Second Life Viewer Source Code
- * Copyright (C) 2010, Linden Research, Inc.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License only.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA
- * $/LicenseInfo$
- */
-#ifndef LLMEMORY_H
-#define LLMEMORY_H
-
-#include "linden_common.h"
-#include "llunits.h"
-#include "stdtypes.h"
-#if !LL_WINDOWS
-#include <stdint.h>
-#endif
-
-class LLMutex ;
-
-#if LL_WINDOWS && LL_DEBUG
-#define LL_CHECK_MEMORY llassert(_CrtCheckMemory());
-#else
-#define LL_CHECK_MEMORY
-#endif
-
-
-#if LL_WINDOWS
-#define LL_ALIGN_OF __alignof
-#else
-#define LL_ALIGN_OF __align_of__
-#endif
-
-#if LL_WINDOWS
-#define LL_DEFAULT_HEAP_ALIGN 8
-#elif LL_DARWIN
-#define LL_DEFAULT_HEAP_ALIGN 16
-#elif LL_LINUX
-#define LL_DEFAULT_HEAP_ALIGN 8
-#endif
-
-
-LL_COMMON_API void ll_assert_aligned_func(uintptr_t ptr,U32 alignment);
-
-#ifdef SHOW_ASSERT
-// This is incredibly expensive - in profiling Windows RWD builds, 30%
-// of CPU time was in aligment checks.
-//#define ASSERT_ALIGNMENT
-#endif
-
-#ifdef ASSERT_ALIGNMENT
-#define ll_assert_aligned(ptr,alignment) ll_assert_aligned_func(uintptr_t(ptr),((U32)alignment))
-#else
-#define ll_assert_aligned(ptr,alignment)
-#endif
-
-#include <xmmintrin.h>
-
-template <typename T> T* LL_NEXT_ALIGNED_ADDRESS(T* address)
-{
- return reinterpret_cast<T*>(
- (uintptr_t(address) + 0xF) & ~0xF);
-}
-
-template <typename T> T* LL_NEXT_ALIGNED_ADDRESS_64(T* address)
-{
- return reinterpret_cast<T*>(
- (uintptr_t(address) + 0x3F) & ~0x3F);
-}
-
-#if LL_LINUX || LL_DARWIN
-
-#define LL_ALIGN_PREFIX(x)
-#define LL_ALIGN_POSTFIX(x) __attribute__((aligned(x)))
-
-#elif LL_WINDOWS
-
-#define LL_ALIGN_PREFIX(x) __declspec(align(x))
-#define LL_ALIGN_POSTFIX(x)
-
-#else
-#error "LL_ALIGN_PREFIX and LL_ALIGN_POSTFIX undefined"
-#endif
-
-#define LL_ALIGN_16(var) LL_ALIGN_PREFIX(16) var LL_ALIGN_POSTFIX(16)
-
-#define LL_ALIGN_NEW \
-public: \
- void* operator new(size_t size) \
- { \
- return ll_aligned_malloc_16(size); \
- } \
- \
- void operator delete(void* ptr) \
- { \
- ll_aligned_free_16(ptr); \
- } \
- \
- void* operator new[](size_t size) \
- { \
- return ll_aligned_malloc_16(size); \
- } \
- \
- void operator delete[](void* ptr) \
- { \
- ll_aligned_free_16(ptr); \
- }
-
-
-//------------------------------------------------------------------------------------------------
-//------------------------------------------------------------------------------------------------
- // for enable buffer overrun detection predefine LL_DEBUG_BUFFER_OVERRUN in current library
- // change preprocessor code to: #if 1 && defined(LL_WINDOWS)
-
-#if 0 && defined(LL_WINDOWS)
- void* ll_aligned_malloc_fallback( size_t size, int align );
- void ll_aligned_free_fallback( void* ptr );
-//------------------------------------------------------------------------------------------------
-#else
- inline void* ll_aligned_malloc_fallback( size_t size, int align )
- {
- LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
- #if defined(LL_WINDOWS)
- void* ret = _aligned_malloc(size, align);
- #else
- char* aligned = NULL;
- void* mem = malloc( size + (align - 1) + sizeof(void*) );
- if (mem)
- {
- aligned = ((char*)mem) + sizeof(void*);
- aligned += align - ((uintptr_t)aligned & (align - 1));
-
- ((void**)aligned)[-1] = mem;
- }
- void* ret = aligned;
- #endif
- LL_PROFILE_ALLOC(ret, size);
- return ret;
- }
-
- inline void ll_aligned_free_fallback( void* ptr )
- {
- LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
- LL_PROFILE_FREE(ptr);
- #if defined(LL_WINDOWS)
- _aligned_free(ptr);
- #else
- if (ptr)
- {
- free( ((void**)ptr)[-1] );
- }
- #endif
- }
-#endif
-//------------------------------------------------------------------------------------------------
-//------------------------------------------------------------------------------------------------
-
-inline void* ll_aligned_malloc_16(size_t size) // returned hunk MUST be freed with ll_aligned_free_16().
-{
- LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
-#if defined(LL_WINDOWS)
- void* ret = _aligned_malloc(size, 16);
-#elif defined(LL_DARWIN)
- void* ret = malloc(size); // default osx malloc is 16 byte aligned.
-#else
- void *ret;
- if (0 != posix_memalign(&ret, 16, size))
- return nullptr;
-#endif
- LL_PROFILE_ALLOC(ret, size);
- return ret;
-}
-
-inline void ll_aligned_free_16(void *p)
-{
- LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
- LL_PROFILE_FREE(p);
-#if defined(LL_WINDOWS)
- _aligned_free(p);
-#elif defined(LL_DARWIN)
- return free(p);
-#else
- free(p); // posix_memalign() is compatible with heap deallocator
-#endif
-}
-
-inline void* ll_aligned_realloc_16(void* ptr, size_t size, size_t old_size) // returned hunk MUST be freed with ll_aligned_free_16().
-{
- LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
- LL_PROFILE_FREE(ptr);
-#if defined(LL_WINDOWS)
- void* ret = _aligned_realloc(ptr, size, 16);
-#elif defined(LL_DARWIN)
- void* ret = realloc(ptr,size); // default osx malloc is 16 byte aligned.
-#else
- //FIXME: memcpy is SLOW
- void* ret = ll_aligned_malloc_16(size);
- if (ptr)
- {
- if (ret)
- {
- // Only copy the size of the smallest memory block to avoid memory corruption.
- memcpy(ret, ptr, llmin(old_size, size));
- }
- ll_aligned_free_16(ptr);
- }
-#endif
- LL_PROFILE_ALLOC(ptr, size);
- return ret;
-}
-
-inline void* ll_aligned_malloc_32(size_t size) // returned hunk MUST be freed with ll_aligned_free_32().
-{
- LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
-#if defined(LL_WINDOWS)
- void* ret = _aligned_malloc(size, 32);
-#elif defined(LL_DARWIN)
- void* ret = ll_aligned_malloc_fallback( size, 32 );
-#else
- void *ret;
- if (0 != posix_memalign(&ret, 32, size))
- return nullptr;
-#endif
- LL_PROFILE_ALLOC(ret, size);
- return ret;
-}
-
-inline void ll_aligned_free_32(void *p)
-{
- LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
- LL_PROFILE_FREE(p);
-#if defined(LL_WINDOWS)
- _aligned_free(p);
-#elif defined(LL_DARWIN)
- ll_aligned_free_fallback( p );
-#else
- free(p); // posix_memalign() is compatible with heap deallocator
-#endif
-}
-
-// general purpose dispatch functions that are forced inline so they can compile down to a single call
-template<size_t ALIGNMENT>
-LL_FORCE_INLINE void* ll_aligned_malloc(size_t size)
-{
- LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
- void* ret;
- if (LL_DEFAULT_HEAP_ALIGN % ALIGNMENT == 0)
- {
- ret = malloc(size);
- LL_PROFILE_ALLOC(ret, size);
- }
- else if (ALIGNMENT == 16)
- {
- ret = ll_aligned_malloc_16(size);
- }
- else if (ALIGNMENT == 32)
- {
- ret = ll_aligned_malloc_32(size);
- }
- else
- {
- ret = ll_aligned_malloc_fallback(size, ALIGNMENT);
- }
- return ret;
-}
-
-template<size_t ALIGNMENT>
-LL_FORCE_INLINE void ll_aligned_free(void* ptr)
-{
- LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
- if (ALIGNMENT == LL_DEFAULT_HEAP_ALIGN)
- {
- LL_PROFILE_FREE(ptr);
- free(ptr);
- }
- else if (ALIGNMENT == 16)
- {
- ll_aligned_free_16(ptr);
- }
- else if (ALIGNMENT == 32)
- {
- return ll_aligned_free_32(ptr);
- }
- else
- {
- return ll_aligned_free_fallback(ptr);
- }
-}
-
-// Copy words 16-byte blocks from src to dst. Source and destination MUST NOT OVERLAP.
-// Source and dest must be 16-byte aligned and size must be multiple of 16.
-//
-inline void ll_memcpy_nonaliased_aligned_16(char* __restrict dst, const char* __restrict src, size_t bytes)
-{
- LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
- assert(src != NULL);
- assert(dst != NULL);
- assert(bytes > 0);
- assert((bytes % sizeof(F32))== 0);
- ll_assert_aligned(src,16);
- ll_assert_aligned(dst,16);
-
- assert((src < dst) ? ((src + bytes) <= dst) : ((dst + bytes) <= src));
- assert(bytes%16==0);
-
- char* end = dst + bytes;
-
- if (bytes > 64)
- {
-
- // Find start of 64b aligned area within block
- //
- void* begin_64 = LL_NEXT_ALIGNED_ADDRESS_64(dst);
-
- //at least 64 bytes before the end of the destination, switch to 16 byte copies
- void* end_64 = end-64;
-
- // Prefetch the head of the 64b area now
- //
- _mm_prefetch((char*)begin_64, _MM_HINT_NTA);
- _mm_prefetch((char*)begin_64 + 64, _MM_HINT_NTA);
- _mm_prefetch((char*)begin_64 + 128, _MM_HINT_NTA);
- _mm_prefetch((char*)begin_64 + 192, _MM_HINT_NTA);
-
- // Copy 16b chunks until we're 64b aligned
- //
- while (dst < begin_64)
- {
-
- _mm_store_ps((F32*)dst, _mm_load_ps((F32*)src));
- dst += 16;
- src += 16;
- }
-
- // Copy 64b chunks up to your tail
- //
- // might be good to shmoo the 512b prefetch offset
- // (characterize performance for various values)
- //
- while (dst < end_64)
- {
- _mm_prefetch((char*)src + 512, _MM_HINT_NTA);
- _mm_prefetch((char*)dst + 512, _MM_HINT_NTA);
- _mm_store_ps((F32*)dst, _mm_load_ps((F32*)src));
- _mm_store_ps((F32*)(dst + 16), _mm_load_ps((F32*)(src + 16)));
- _mm_store_ps((F32*)(dst + 32), _mm_load_ps((F32*)(src + 32)));
- _mm_store_ps((F32*)(dst + 48), _mm_load_ps((F32*)(src + 48)));
- dst += 64;
- src += 64;
- }
- }
-
- // Copy remainder 16b tail chunks (or ALL 16b chunks for sub-64b copies)
- //
- while (dst < end)
- {
- _mm_store_ps((F32*)dst, _mm_load_ps((F32*)src));
- dst += 16;
- src += 16;
- }
-}
-
-#ifndef __DEBUG_PRIVATE_MEM__
-#define __DEBUG_PRIVATE_MEM__ 0
-#endif
-
-class LL_COMMON_API LLMemory
-{
-public:
- // Return the resident set size of the current process, in bytes.
- // Return value is zero if not known.
- static U64 getCurrentRSS();
- static void* tryToAlloc(void* address, U32 size);
- static void initMaxHeapSizeGB(F32Gigabytes max_heap_size);
- static void updateMemoryInfo() ;
- static void logMemoryInfo(bool update = false);
-
- static U32Kilobytes getAvailableMemKB() ;
- static U32Kilobytes getMaxMemKB() ;
- static U32Kilobytes getAllocatedMemKB() ;
-private:
- static U32Kilobytes sAvailPhysicalMemInKB ;
- static U32Kilobytes sMaxPhysicalMemInKB ;
- static U32Kilobytes sAllocatedMemInKB;
- static U32Kilobytes sAllocatedPageSizeInKB ;
-
- static U32Kilobytes sMaxHeapSizeInKB;
-};
-
-// LLRefCount moved to llrefcount.h
-
-// LLPointer moved to llpointer.h
-
-// LLSafeHandle moved to llsafehandle.h
-
-// LLSingleton moved to llsingleton.h
-
-
-
-
-#endif
+/**
+ * @file llmemory.h
+ * @brief Memory allocation/deallocation header-stuff goes here.
+ *
+ * $LicenseInfo:firstyear=2002&license=viewerlgpl$
+ * Second Life Viewer Source Code
+ * Copyright (C) 2010, Linden Research, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License only.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA
+ * $/LicenseInfo$
+ */
+#ifndef LLMEMORY_H
+#define LLMEMORY_H
+
+#include "linden_common.h"
+#include "llunits.h"
+#include "stdtypes.h"
+#if !LL_WINDOWS
+#include <stdint.h>
+#endif
+
+class LLMutex ;
+
+#if LL_WINDOWS && LL_DEBUG
+#define LL_CHECK_MEMORY llassert(_CrtCheckMemory());
+#else
+#define LL_CHECK_MEMORY
+#endif
+
+
+#if LL_WINDOWS
+#define LL_ALIGN_OF __alignof
+#else
+#define LL_ALIGN_OF __align_of__
+#endif
+
+#if LL_WINDOWS
+#define LL_DEFAULT_HEAP_ALIGN 8
+#elif LL_DARWIN
+#define LL_DEFAULT_HEAP_ALIGN 16
+#elif LL_LINUX
+#define LL_DEFAULT_HEAP_ALIGN 8
+#endif
+
+
+LL_COMMON_API void ll_assert_aligned_func(uintptr_t ptr,U32 alignment);
+
+#ifdef SHOW_ASSERT
+// This is incredibly expensive - in profiling Windows RWD builds, 30%
+// of CPU time was in aligment checks.
+//#define ASSERT_ALIGNMENT
+#endif
+
+#ifdef ASSERT_ALIGNMENT
+#define ll_assert_aligned(ptr,alignment) ll_assert_aligned_func(uintptr_t(ptr),((U32)alignment))
+#else
+#define ll_assert_aligned(ptr,alignment)
+#endif
+
+#include <xmmintrin.h>
+
+template <typename T> T* LL_NEXT_ALIGNED_ADDRESS(T* address)
+{
+ return reinterpret_cast<T*>(
+ (uintptr_t(address) + 0xF) & ~0xF);
+}
+
+template <typename T> T* LL_NEXT_ALIGNED_ADDRESS_64(T* address)
+{
+ return reinterpret_cast<T*>(
+ (uintptr_t(address) + 0x3F) & ~0x3F);
+}
+
+#if LL_LINUX || LL_DARWIN
+
+#define LL_ALIGN_PREFIX(x)
+#define LL_ALIGN_POSTFIX(x) __attribute__((aligned(x)))
+
+#elif LL_WINDOWS
+
+#define LL_ALIGN_PREFIX(x) __declspec(align(x))
+#define LL_ALIGN_POSTFIX(x)
+
+#else
+#error "LL_ALIGN_PREFIX and LL_ALIGN_POSTFIX undefined"
+#endif
+
+#define LL_ALIGN_16(var) LL_ALIGN_PREFIX(16) var LL_ALIGN_POSTFIX(16)
+
+#define LL_ALIGN_NEW \
+public: \
+ void* operator new(size_t size) \
+ { \
+ return ll_aligned_malloc_16(size); \
+ } \
+ \
+ void operator delete(void* ptr) \
+ { \
+ ll_aligned_free_16(ptr); \
+ } \
+ \
+ void* operator new[](size_t size) \
+ { \
+ return ll_aligned_malloc_16(size); \
+ } \
+ \
+ void operator delete[](void* ptr) \
+ { \
+ ll_aligned_free_16(ptr); \
+ }
+
+
+//------------------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------------------------
+ // for enable buffer overrun detection predefine LL_DEBUG_BUFFER_OVERRUN in current library
+ // change preprocessor code to: #if 1 && defined(LL_WINDOWS)
+
+#if 0 && defined(LL_WINDOWS)
+ void* ll_aligned_malloc_fallback( size_t size, int align );
+ void ll_aligned_free_fallback( void* ptr );
+//------------------------------------------------------------------------------------------------
+#else
+ inline void* ll_aligned_malloc_fallback( size_t size, int align )
+ {
+ LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
+ #if defined(LL_WINDOWS)
+ void* ret = _aligned_malloc(size, align);
+ #else
+ char* aligned = NULL;
+ void* mem = malloc( size + (align - 1) + sizeof(void*) );
+ if (mem)
+ {
+ aligned = ((char*)mem) + sizeof(void*);
+ aligned += align - ((uintptr_t)aligned & (align - 1));
+
+ ((void**)aligned)[-1] = mem;
+ }
+ void* ret = aligned;
+ #endif
+ LL_PROFILE_ALLOC(ret, size);
+ return ret;
+ }
+
+ inline void ll_aligned_free_fallback( void* ptr )
+ {
+ LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
+ LL_PROFILE_FREE(ptr);
+ #if defined(LL_WINDOWS)
+ _aligned_free(ptr);
+ #else
+ if (ptr)
+ {
+ free( ((void**)ptr)[-1] );
+ }
+ #endif
+ }
+#endif
+//------------------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------------------------
+
+inline void* ll_aligned_malloc_16(size_t size) // returned hunk MUST be freed with ll_aligned_free_16().
+{
+ LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
+#if defined(LL_WINDOWS)
+ void* ret = _aligned_malloc(size, 16);
+#elif defined(LL_DARWIN)
+ void* ret = malloc(size); // default osx malloc is 16 byte aligned.
+#else
+ void *ret;
+ if (0 != posix_memalign(&ret, 16, size))
+ return nullptr;
+#endif
+ LL_PROFILE_ALLOC(ret, size);
+ return ret;
+}
+
+inline void ll_aligned_free_16(void *p)
+{
+ LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
+ LL_PROFILE_FREE(p);
+#if defined(LL_WINDOWS)
+ _aligned_free(p);
+#elif defined(LL_DARWIN)
+ return free(p);
+#else
+ free(p); // posix_memalign() is compatible with heap deallocator
+#endif
+}
+
+inline void* ll_aligned_realloc_16(void* ptr, size_t size, size_t old_size) // returned hunk MUST be freed with ll_aligned_free_16().
+{
+ LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
+ LL_PROFILE_FREE(ptr);
+#if defined(LL_WINDOWS)
+ void* ret = _aligned_realloc(ptr, size, 16);
+#elif defined(LL_DARWIN)
+ void* ret = realloc(ptr,size); // default osx malloc is 16 byte aligned.
+#else
+ //FIXME: memcpy is SLOW
+ void* ret = ll_aligned_malloc_16(size);
+ if (ptr)
+ {
+ if (ret)
+ {
+ // Only copy the size of the smallest memory block to avoid memory corruption.
+ memcpy(ret, ptr, llmin(old_size, size));
+ }
+ ll_aligned_free_16(ptr);
+ }
+#endif
+ LL_PROFILE_ALLOC(ptr, size);
+ return ret;
+}
+
+inline void* ll_aligned_malloc_32(size_t size) // returned hunk MUST be freed with ll_aligned_free_32().
+{
+ LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
+#if defined(LL_WINDOWS)
+ void* ret = _aligned_malloc(size, 32);
+#elif defined(LL_DARWIN)
+ void* ret = ll_aligned_malloc_fallback( size, 32 );
+#else
+ void *ret;
+ if (0 != posix_memalign(&ret, 32, size))
+ return nullptr;
+#endif
+ LL_PROFILE_ALLOC(ret, size);
+ return ret;
+}
+
+inline void ll_aligned_free_32(void *p)
+{
+ LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
+ LL_PROFILE_FREE(p);
+#if defined(LL_WINDOWS)
+ _aligned_free(p);
+#elif defined(LL_DARWIN)
+ ll_aligned_free_fallback( p );
+#else
+ free(p); // posix_memalign() is compatible with heap deallocator
+#endif
+}
+
+// general purpose dispatch functions that are forced inline so they can compile down to a single call
+template<size_t ALIGNMENT>
+LL_FORCE_INLINE void* ll_aligned_malloc(size_t size)
+{
+ LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
+ void* ret;
+ if (LL_DEFAULT_HEAP_ALIGN % ALIGNMENT == 0)
+ {
+ ret = malloc(size);
+ LL_PROFILE_ALLOC(ret, size);
+ }
+ else if (ALIGNMENT == 16)
+ {
+ ret = ll_aligned_malloc_16(size);
+ }
+ else if (ALIGNMENT == 32)
+ {
+ ret = ll_aligned_malloc_32(size);
+ }
+ else
+ {
+ ret = ll_aligned_malloc_fallback(size, ALIGNMENT);
+ }
+ return ret;
+}
+
+template<size_t ALIGNMENT>
+LL_FORCE_INLINE void ll_aligned_free(void* ptr)
+{
+ LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
+ if (ALIGNMENT == LL_DEFAULT_HEAP_ALIGN)
+ {
+ LL_PROFILE_FREE(ptr);
+ free(ptr);
+ }
+ else if (ALIGNMENT == 16)
+ {
+ ll_aligned_free_16(ptr);
+ }
+ else if (ALIGNMENT == 32)
+ {
+ return ll_aligned_free_32(ptr);
+ }
+ else
+ {
+ return ll_aligned_free_fallback(ptr);
+ }
+}
+
+// Copy words 16-byte blocks from src to dst. Source and destination MUST NOT OVERLAP.
+// Source and dest must be 16-byte aligned and size must be multiple of 16.
+//
+inline void ll_memcpy_nonaliased_aligned_16(char* __restrict dst, const char* __restrict src, size_t bytes)
+{
+ LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
+ assert(src != NULL);
+ assert(dst != NULL);
+ assert(bytes > 0);
+ assert((bytes % sizeof(F32))== 0);
+ ll_assert_aligned(src,16);
+ ll_assert_aligned(dst,16);
+
+ assert((src < dst) ? ((src + bytes) <= dst) : ((dst + bytes) <= src));
+ assert(bytes%16==0);
+
+ char* end = dst + bytes;
+
+ if (bytes > 64)
+ {
+
+ // Find start of 64b aligned area within block
+ //
+ void* begin_64 = LL_NEXT_ALIGNED_ADDRESS_64(dst);
+
+ //at least 64 bytes before the end of the destination, switch to 16 byte copies
+ void* end_64 = end-64;
+
+ // Prefetch the head of the 64b area now
+ //
+ _mm_prefetch((char*)begin_64, _MM_HINT_NTA);
+ _mm_prefetch((char*)begin_64 + 64, _MM_HINT_NTA);
+ _mm_prefetch((char*)begin_64 + 128, _MM_HINT_NTA);
+ _mm_prefetch((char*)begin_64 + 192, _MM_HINT_NTA);
+
+ // Copy 16b chunks until we're 64b aligned
+ //
+ while (dst < begin_64)
+ {
+
+ _mm_store_ps((F32*)dst, _mm_load_ps((F32*)src));
+ dst += 16;
+ src += 16;
+ }
+
+ // Copy 64b chunks up to your tail
+ //
+ // might be good to shmoo the 512b prefetch offset
+ // (characterize performance for various values)
+ //
+ while (dst < end_64)
+ {
+ _mm_prefetch((char*)src + 512, _MM_HINT_NTA);
+ _mm_prefetch((char*)dst + 512, _MM_HINT_NTA);
+ _mm_store_ps((F32*)dst, _mm_load_ps((F32*)src));
+ _mm_store_ps((F32*)(dst + 16), _mm_load_ps((F32*)(src + 16)));
+ _mm_store_ps((F32*)(dst + 32), _mm_load_ps((F32*)(src + 32)));
+ _mm_store_ps((F32*)(dst + 48), _mm_load_ps((F32*)(src + 48)));
+ dst += 64;
+ src += 64;
+ }
+ }
+
+ // Copy remainder 16b tail chunks (or ALL 16b chunks for sub-64b copies)
+ //
+ while (dst < end)
+ {
+ _mm_store_ps((F32*)dst, _mm_load_ps((F32*)src));
+ dst += 16;
+ src += 16;
+ }
+}
+
+#ifndef __DEBUG_PRIVATE_MEM__
+#define __DEBUG_PRIVATE_MEM__ 0
+#endif
+
+class LL_COMMON_API LLMemory
+{
+public:
+ // Return the resident set size of the current process, in bytes.
+ // Return value is zero if not known.
+ static U64 getCurrentRSS();
+ static void* tryToAlloc(void* address, U32 size);
+ static void initMaxHeapSizeGB(F32Gigabytes max_heap_size);
+ static void updateMemoryInfo() ;
+ static void logMemoryInfo(bool update = false);
+
+ static U32Kilobytes getAvailableMemKB() ;
+ static U32Kilobytes getMaxMemKB() ;
+ static U32Kilobytes getAllocatedMemKB() ;
+private:
+ static U32Kilobytes sAvailPhysicalMemInKB ;
+ static U32Kilobytes sMaxPhysicalMemInKB ;
+ static U32Kilobytes sAllocatedMemInKB;
+ static U32Kilobytes sAllocatedPageSizeInKB ;
+
+ static U32Kilobytes sMaxHeapSizeInKB;
+};
+
+// LLRefCount moved to llrefcount.h
+
+// LLPointer moved to llpointer.h
+
+// LLSafeHandle moved to llsafehandle.h
+
+// LLSingleton moved to llsingleton.h
+
+
+
+
+#endif