summaryrefslogtreecommitdiff
path: root/indra/llcommon
diff options
context:
space:
mode:
Diffstat (limited to 'indra/llcommon')
-rw-r--r--indra/llcommon/CMakeLists.txt1
-rw-r--r--indra/llcommon/llalignedarray.h139
-rw-r--r--indra/llcommon/llmemory.h116
3 files changed, 250 insertions, 6 deletions
diff --git a/indra/llcommon/CMakeLists.txt b/indra/llcommon/CMakeLists.txt
index e019c17280..0c2ceebb52 100644
--- a/indra/llcommon/CMakeLists.txt
+++ b/indra/llcommon/CMakeLists.txt
@@ -121,6 +121,7 @@ set(llcommon_HEADER_FILES
linden_common.h
linked_lists.h
llaccountingcost.h
+ llalignedarray.h
llallocator.h
llallocator_heap_profile.h
llagentconstants.h
diff --git a/indra/llcommon/llalignedarray.h b/indra/llcommon/llalignedarray.h
new file mode 100644
index 0000000000..ed8fd31205
--- /dev/null
+++ b/indra/llcommon/llalignedarray.h
@@ -0,0 +1,139 @@
+/**
+ * @file llalignedarray.h
+ * @brief A static array which obeys alignment restrictions and mimics std::vector accessors.
+ *
+ * $LicenseInfo:firstyear=2002&license=viewerlgpl$
+ * Second Life Viewer Source Code
+ * Copyright (C) 2010, Linden Research, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License only.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA
+ * $/LicenseInfo$
+ */
+
+#ifndef LL_LLALIGNEDARRAY_H
+#define LL_LLALIGNEDARRAY_H
+
+#include "llmemory.h"
+
+template <class T, U32 alignment>
+class LLAlignedArray
+{
+public:
+ T* mArray;
+ U32 mElementCount;
+ U32 mCapacity;
+
+ LLAlignedArray();
+ ~LLAlignedArray();
+
+ void push_back(const T& elem);
+ U32 size() const { return mElementCount; }
+ void resize(U32 size);
+ T* append(S32 N);
+ T& operator[](int idx);
+ const T& operator[](int idx) const;
+};
+
+template <class T, U32 alignment>
+LLAlignedArray<T, alignment>::LLAlignedArray()
+{
+ llassert(alignment >= 16);
+ mArray = NULL;
+ mElementCount = 0;
+ mCapacity = 0;
+}
+
+template <class T, U32 alignment>
+LLAlignedArray<T, alignment>::~LLAlignedArray()
+{
+ ll_aligned_free(mArray);
+ mArray = NULL;
+ mElementCount = 0;
+ mCapacity = 0;
+}
+
+template <class T, U32 alignment>
+void LLAlignedArray<T, alignment>::push_back(const T& elem)
+{
+ T* old_buf = NULL;
+ if (mCapacity <= mElementCount)
+ {
+ mCapacity++;
+ mCapacity *= 2;
+ T* new_buf = (T*) ll_aligned_malloc(mCapacity*sizeof(T), alignment);
+ if (mArray)
+ {
+ ll_memcpy_nonaliased_aligned_16((char*)new_buf, (char*)mArray, sizeof(T)*mElementCount);
+ }
+ old_buf = mArray;
+ mArray = new_buf;
+ }
+
+ mArray[mElementCount++] = elem;
+
+ //delete old array here to prevent error on a.push_back(a[0])
+ ll_aligned_free(old_buf);
+}
+
+template <class T, U32 alignment>
+void LLAlignedArray<T, alignment>::resize(U32 size)
+{
+ if (mCapacity < size)
+ {
+ mCapacity = size+mCapacity*2;
+ T* new_buf = mCapacity > 0 ? (T*) ll_aligned_malloc(mCapacity*sizeof(T), alignment) : NULL;
+ if (mArray)
+ {
+ ll_memcpy_nonaliased_aligned_16((char*) new_buf, (char*) mArray, sizeof(T)*mElementCount);
+ ll_aligned_free(mArray);
+ }
+
+ /*for (U32 i = mElementCount; i < mCapacity; ++i)
+ {
+ new(new_buf+i) T();
+ }*/
+ mArray = new_buf;
+ }
+
+ mElementCount = size;
+}
+
+
+template <class T, U32 alignment>
+T& LLAlignedArray<T, alignment>::operator[](int idx)
+{
+ llassert(idx < mElementCount);
+ return mArray[idx];
+}
+
+template <class T, U32 alignment>
+const T& LLAlignedArray<T, alignment>::operator[](int idx) const
+{
+ llassert(idx < mElementCount);
+ return mArray[idx];
+}
+
+template <class T, U32 alignment>
+T* LLAlignedArray<T, alignment>::append(S32 N)
+{
+ U32 sz = size();
+ resize(sz+N);
+ return &((*this)[sz]);
+}
+
+#endif
+
diff --git a/indra/llcommon/llmemory.h b/indra/llcommon/llmemory.h
index 46cabfadcd..61e30f11cc 100644
--- a/indra/llcommon/llmemory.h
+++ b/indra/llcommon/llmemory.h
@@ -36,6 +36,44 @@ class LLMutex ;
#define LL_CHECK_MEMORY
#endif
+LL_COMMON_API void ll_assert_aligned_func(uintptr_t ptr,U32 alignment);
+
+#ifdef SHOW_ASSERT
+#define ll_assert_aligned(ptr,alignment) ll_assert_aligned_func(reinterpret_cast<uintptr_t>(ptr),((U32)alignment))
+#else
+#define ll_assert_aligned(ptr,alignment)
+#endif
+
+#include <xmmintrin.h>
+
+template <typename T> T* LL_NEXT_ALIGNED_ADDRESS(T* address)
+{
+ return reinterpret_cast<T*>(
+ (reinterpret_cast<uintptr_t>(address) + 0xF) & ~0xF);
+}
+
+template <typename T> T* LL_NEXT_ALIGNED_ADDRESS_64(T* address)
+{
+ return reinterpret_cast<T*>(
+ (reinterpret_cast<uintptr_t>(address) + 0x3F) & ~0x3F);
+}
+
+#if LL_LINUX || LL_DARWIN
+
+#define LL_ALIGN_PREFIX(x)
+#define LL_ALIGN_POSTFIX(x) __attribute__((aligned(x)))
+
+#elif LL_WINDOWS
+
+#define LL_ALIGN_PREFIX(x) __declspec(align(x))
+#define LL_ALIGN_POSTFIX(x)
+
+#else
+#error "LL_ALIGN_PREFIX and LL_ALIGN_POSTFIX undefined"
+#endif
+
+#define LL_ALIGN_16(var) LL_ALIGN_PREFIX(16) var LL_ALIGN_POSTFIX(16)
+
inline void* ll_aligned_malloc( size_t size, int align )
{
#if defined(LL_WINDOWS)
@@ -144,6 +182,78 @@ inline void ll_aligned_free_32(void *p)
#endif
}
+
+// Copy words 16-byte blocks from src to dst. Source and destination MUST NOT OVERLAP.
+// Source and dest must be 16-byte aligned and size must be multiple of 16.
+//
+inline void ll_memcpy_nonaliased_aligned_16(char* __restrict dst, const char* __restrict src, size_t bytes)
+{
+ assert(src != NULL);
+ assert(dst != NULL);
+ assert(bytes > 0);
+ assert((bytes % sizeof(F32))== 0);
+ ll_assert_aligned(src,16);
+ ll_assert_aligned(dst,16);
+ assert((src < dst) ? ((src + bytes) < dst) : ((dst + bytes) < src));
+ assert(bytes%16==0);
+
+ char* end = dst + bytes;
+
+ if (bytes > 64)
+ {
+
+ // Find start of 64b aligned area within block
+ //
+ void* begin_64 = LL_NEXT_ALIGNED_ADDRESS_64(dst);
+
+ //at least 64 bytes before the end of the destination, switch to 16 byte copies
+ void* end_64 = end-64;
+
+ // Prefetch the head of the 64b area now
+ //
+ _mm_prefetch((char*)begin_64, _MM_HINT_NTA);
+ _mm_prefetch((char*)begin_64 + 64, _MM_HINT_NTA);
+ _mm_prefetch((char*)begin_64 + 128, _MM_HINT_NTA);
+ _mm_prefetch((char*)begin_64 + 192, _MM_HINT_NTA);
+
+ // Copy 16b chunks until we're 64b aligned
+ //
+ while (dst < begin_64)
+ {
+
+ _mm_store_ps((F32*)dst, _mm_load_ps((F32*)src));
+ dst += 16;
+ src += 16;
+ }
+
+ // Copy 64b chunks up to your tail
+ //
+ // might be good to shmoo the 512b prefetch offset
+ // (characterize performance for various values)
+ //
+ while (dst < end_64)
+ {
+ _mm_prefetch((char*)src + 512, _MM_HINT_NTA);
+ _mm_prefetch((char*)dst + 512, _MM_HINT_NTA);
+ _mm_store_ps((F32*)dst, _mm_load_ps((F32*)src));
+ _mm_store_ps((F32*)(dst + 16), _mm_load_ps((F32*)(src + 16)));
+ _mm_store_ps((F32*)(dst + 32), _mm_load_ps((F32*)(src + 32)));
+ _mm_store_ps((F32*)(dst + 48), _mm_load_ps((F32*)(src + 48)));
+ dst += 64;
+ src += 64;
+ }
+ }
+
+ // Copy remainder 16b tail chunks (or ALL 16b chunks for sub-64b copies)
+ //
+ while (dst < end)
+ {
+ _mm_store_ps((F32*)dst, _mm_load_ps((F32*)src));
+ dst += 16;
+ src += 16;
+ }
+}
+
#ifndef __DEBUG_PRIVATE_MEM__
#define __DEBUG_PRIVATE_MEM__ 0
#endif
@@ -552,13 +662,7 @@ void LLPrivateMemoryPoolTester::operator delete[](void* addr)
// LLSingleton moved to llsingleton.h
-LL_COMMON_API void ll_assert_aligned_func(uintptr_t ptr,U32 alignment);
-#ifdef SHOW_ASSERT
-#define ll_assert_aligned(ptr,alignment) ll_assert_aligned_func(reinterpret_cast<uintptr_t>(ptr),((U32)alignment))
-#else
-#define ll_assert_aligned(ptr,alignment)
-#endif
#endif