summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--indra/cmake/Copy3rdPartyLibs.cmake9
-rwxr-xr-x[-rw-r--r--]indra/cmake/FindGooglePerfTools.cmake0
-rwxr-xr-x[-rw-r--r--]indra/cmake/GooglePerfTools.cmake40
-rwxr-xr-x[-rw-r--r--]indra/cmake/LLAddBuildTest.cmake9
-rwxr-xr-x[-rw-r--r--]indra/llcommon/llallocator.cpp2
-rwxr-xr-x[-rw-r--r--]indra/llcommon/llmemory.cpp12
-rwxr-xr-x[-rw-r--r--]indra/llcommon/llmemory.h45
-rwxr-xr-x[-rw-r--r--]indra/llmath/CMakeLists.txt1
-rwxr-xr-x[-rw-r--r--]indra/llmath/llcamera.h12
-rwxr-xr-x[-rw-r--r--]indra/llmath/llmatrix3a.cpp0
-rw-r--r--indra/llmath/llmatrix3a.h2
-rwxr-xr-x[-rw-r--r--]indra/llmath/llmatrix4a.h2
-rw-r--r--indra/llmath/lloctree.h4
-rwxr-xr-x[-rw-r--r--]indra/llmath/llplane.h4
-rwxr-xr-x[-rw-r--r--]indra/llmath/llsimdmath.h3
-rw-r--r--indra/llmath/llsimdtypes.inl2
-rwxr-xr-x[-rw-r--r--]indra/llmath/llvector4a.cpp8
-rwxr-xr-x[-rw-r--r--]indra/llmath/llvector4a.h6
-rw-r--r--indra/llmath/llvector4a.inl1
-rw-r--r--indra/llmath/llvector4logical.h2
-rwxr-xr-x[-rw-r--r--]indra/llmath/llvolume.cpp45
-rwxr-xr-x[-rw-r--r--]indra/llmath/llvolumeoctree.h26
-rwxr-xr-xindra/llmath/tests/alignment_test.cpp120
-rw-r--r--indra/llprimitive/llmodel.cpp3
-rwxr-xr-x[-rw-r--r--]indra/newview/CMakeLists.txt8
-rw-r--r--indra/newview/llappviewerwin32.cpp2
-rwxr-xr-x[-rw-r--r--]indra/newview/lldrawable.h17
-rwxr-xr-x[-rw-r--r--]indra/newview/lldynamictexture.h12
-rw-r--r--indra/newview/llface.cpp3
-rwxr-xr-x[-rw-r--r--]indra/newview/llface.h11
-rwxr-xr-xindra/newview/llfloatermodelpreview.cpp3
-rwxr-xr-x[-rw-r--r--]indra/newview/llpolymesh.cpp24
-rwxr-xr-x[-rw-r--r--]indra/newview/llspatialpartition.cpp3
-rwxr-xr-x[-rw-r--r--]indra/newview/llspatialpartition.h36
-rwxr-xr-x[-rw-r--r--]indra/newview/llviewercamera.h13
-rwxr-xr-x[-rw-r--r--]indra/newview/llviewerjointmesh.cpp6
-rwxr-xr-x[-rw-r--r--]indra/newview/llvoavatar.cpp2
-rwxr-xr-x[-rw-r--r--]indra/newview/llvoavatar.h12
-rwxr-xr-x[-rw-r--r--]indra/newview/llvoavatarself.h10
-rwxr-xr-x[-rw-r--r--]indra/newview/viewer_manifest.py12
40 files changed, 419 insertions, 113 deletions
diff --git a/indra/cmake/Copy3rdPartyLibs.cmake b/indra/cmake/Copy3rdPartyLibs.cmake
index 394db362b1..966300f1c7 100644
--- a/indra/cmake/Copy3rdPartyLibs.cmake
+++ b/indra/cmake/Copy3rdPartyLibs.cmake
@@ -55,10 +55,10 @@ if(WINDOWS)
glod.dll
)
- if(USE_GOOGLE_PERFTOOLS)
+ if(USE_TCMALLOC)
set(debug_files ${debug_files} libtcmalloc_minimal-debug.dll)
set(release_files ${release_files} libtcmalloc_minimal.dll)
- endif(USE_GOOGLE_PERFTOOLS)
+ endif(USE_TCMALLOC)
if (FMOD)
set(debug_files ${debug_files} fmod.dll)
@@ -268,13 +268,16 @@ elseif(LINUX)
libopenal.so
libopenjpeg.so
libssl.so
- libtcmalloc_minimal.so
libuuid.so.16
libuuid.so.16.0.22
libssl.so.1.0.0
libfontconfig.so.1.4.4
)
+ if (USE_TCMALLOC)
+ set(release_files ${release_files} "libtcmalloc_minimal.so")
+ endif (USE_TCMALLOC)
+
if (FMOD)
set(release_files ${release_files} "libfmod-3.75.so")
endif (FMOD)
diff --git a/indra/cmake/FindGooglePerfTools.cmake b/indra/cmake/FindGooglePerfTools.cmake
index bb125d538e..bb125d538e 100644..100755
--- a/indra/cmake/FindGooglePerfTools.cmake
+++ b/indra/cmake/FindGooglePerfTools.cmake
diff --git a/indra/cmake/GooglePerfTools.cmake b/indra/cmake/GooglePerfTools.cmake
index d9f91193be..73b3642ae6 100644..100755
--- a/indra/cmake/GooglePerfTools.cmake
+++ b/indra/cmake/GooglePerfTools.cmake
@@ -1,20 +1,34 @@
# -*- cmake -*-
include(Prebuilt)
+# If you want to enable or disable TCMALLOC in viewer builds, this is the place.
+# set ON or OFF as desired.
+set (USE_TCMALLOC OFF)
+
if (STANDALONE)
include(FindGooglePerfTools)
else (STANDALONE)
if (WINDOWS)
- use_prebuilt_binary(tcmalloc)
- set(TCMALLOC_LIBRARIES
- debug libtcmalloc_minimal-debug
- optimized libtcmalloc_minimal)
+ if (USE_TCMALLOC)
+ use_prebuilt_binary(tcmalloc)
+ set(TCMALLOC_LIBRARIES
+ debug libtcmalloc_minimal-debug
+ optimized libtcmalloc_minimal)
+ set(TCMALLOC_LINK_FLAGS "/INCLUDE:__tcmalloc")
+ else (USE_TCMALLOC)
+ set(TCMALLOC_LIBRARIES)
+ set(TCMALLOC_LINK_FLAGS)
+ endif (USE_TCMALLOC)
set(GOOGLE_PERFTOOLS_FOUND "YES")
endif (WINDOWS)
if (LINUX)
- use_prebuilt_binary(tcmalloc)
- set(TCMALLOC_LIBRARIES
- tcmalloc)
+ if (USE_TCMALLOC)
+ use_prebuilt_binary(tcmalloc)
+ set(TCMALLOC_LIBRARIES
+ tcmalloc)
+ else (USE_TCMALLOC)
+ set(TCMALLOC_LIBRARIES)
+ endif (USE_TCMALLOC)
set(PROFILER_LIBRARIES profiler)
set(GOOGLE_PERFTOOLS_INCLUDE_DIR
${LIBS_PREBUILT_DIR}/include)
@@ -29,13 +43,19 @@ if (GOOGLE_PERFTOOLS_FOUND)
endif (GOOGLE_PERFTOOLS_FOUND)
if (WINDOWS)
- set(USE_GOOGLE_PERFTOOLS ON)
+ set(USE_GOOGLE_PERFTOOLS ON)
endif (WINDOWS)
if (USE_GOOGLE_PERFTOOLS)
- set(TCMALLOC_FLAG -ULL_USE_TCMALLOC=1)
+ if (USE_TCMALLOC)
+ set(TCMALLOC_FLAG -DLL_USE_TCMALLOC=1)
+ else (USE_TCMALLOC)
+ set(TCMALLOC_FLAG -ULL_USE_TCMALLOC)
+ endif (USE_TCMALLOC)
+endif (USE_GOOGLE_PERFTOOLS)
+
+if (USE_GOOGLE_PERFTOOLS)
include_directories(${GOOGLE_PERFTOOLS_INCLUDE_DIR})
set(GOOGLE_PERFTOOLS_LIBRARIES ${TCMALLOC_LIBRARIES} ${STACKTRACE_LIBRARIES} ${PROFILER_LIBRARIES})
else (USE_GOOGLE_PERFTOOLS)
- set(TCMALLOC_FLAG -ULL_USE_TCMALLOC)
endif (USE_GOOGLE_PERFTOOLS)
diff --git a/indra/cmake/LLAddBuildTest.cmake b/indra/cmake/LLAddBuildTest.cmake
index 08feab6e36..a6f69a09e9 100644..100755
--- a/indra/cmake/LLAddBuildTest.cmake
+++ b/indra/cmake/LLAddBuildTest.cmake
@@ -205,6 +205,15 @@ FUNCTION(LL_ADD_INTEGRATION_TEST
SET_TARGET_PROPERTIES(INTEGRATION_TEST_${testname} PROPERTIES COMPILE_FLAGS -I"${TUT_INCLUDE_DIR}")
endif(STANDALONE)
+ if (WINDOWS)
+ SET_TARGET_PROPERTIES(INTEGRATION_TEST_${testname}
+ PROPERTIES
+ LINK_FLAGS "/debug /NODEFAULTLIB:LIBCMT /SUBSYSTEM:WINDOWS ${TCMALLOC_LINK_FLAGS}"
+ LINK_FLAGS_DEBUG "/NODEFAULTLIB:\"LIBCMT;LIBCMTD;MSVCRT\" /INCREMENTAL:NO"
+ LINK_FLAGS_RELEASE ""
+ )
+ endif (WINDOWS)
+
# Add link deps to the executable
if(TEST_DEBUG)
message(STATUS "TARGET_LINK_LIBRARIES(INTEGRATION_TEST_${testname} ${libraries})")
diff --git a/indra/llcommon/llallocator.cpp b/indra/llcommon/llallocator.cpp
index 6f6abefc67..87654b5b97 100644..100755
--- a/indra/llcommon/llallocator.cpp
+++ b/indra/llcommon/llallocator.cpp
@@ -27,7 +27,7 @@
#include "linden_common.h"
#include "llallocator.h"
-#if LL_USE_TCMALLOC
+#if (LL_USE_TCMALLOC && LL_USE_HEAP_PROFILER)
#include "google/heap-profiler.h"
#include "google/commandlineflags_public.h"
diff --git a/indra/llcommon/llmemory.cpp b/indra/llcommon/llmemory.cpp
index 3b9758f996..afaf366668 100644..100755
--- a/indra/llcommon/llmemory.cpp
+++ b/indra/llcommon/llmemory.cpp
@@ -61,6 +61,18 @@ BOOL LLMemory::sEnableMemoryFailurePrevention = FALSE;
LLPrivateMemoryPoolManager::mem_allocation_info_t LLPrivateMemoryPoolManager::sMemAllocationTracker;
#endif
+void ll_assert_aligned_func(uintptr_t ptr,U32 alignment)
+{
+#ifdef SHOW_ASSERT
+ // Redundant, place to set breakpoints.
+ if (ptr%alignment!=0)
+ {
+ llwarns << "alignment check failed" << llendl;
+ }
+ llassert(ptr%alignment==0);
+#endif
+}
+
//static
void LLMemory::initClass()
{
diff --git a/indra/llcommon/llmemory.h b/indra/llcommon/llmemory.h
index bbbdaa6497..9dd776ff57 100644..100755
--- a/indra/llcommon/llmemory.h
+++ b/indra/llcommon/llmemory.h
@@ -27,7 +27,6 @@
#define LLMEMORY_H
#include "llmemtype.h"
-#if LL_DEBUG
inline void* ll_aligned_malloc( size_t size, int align )
{
void* mem = malloc( size + (align - 1) + sizeof(void*) );
@@ -43,10 +42,11 @@ inline void ll_aligned_free( void* ptr )
free( ((void**)ptr)[-1] );
}
+#if !LL_USE_TCMALLOC
inline void* ll_aligned_malloc_16(size_t size) // returned hunk MUST be freed with ll_aligned_free_16().
{
#if defined(LL_WINDOWS)
- return _mm_malloc(size, 16);
+ return _aligned_malloc(size, 16);
#elif defined(LL_DARWIN)
return malloc(size); // default osx malloc is 16 byte aligned.
#else
@@ -58,21 +58,38 @@ inline void* ll_aligned_malloc_16(size_t size) // returned hunk MUST be freed wi
#endif
}
+inline void* ll_aligned_realloc_16(void* ptr, size_t size) // returned hunk MUST be freed with ll_aligned_free_16().
+{
+#if defined(LL_WINDOWS)
+ return _aligned_realloc(ptr, size, 16);
+#elif defined(LL_DARWIN)
+ return realloc(ptr,size); // default osx malloc is 16 byte aligned.
+#else
+ return realloc(ptr,size); // FIXME not guaranteed to be aligned.
+#endif
+}
+
inline void ll_aligned_free_16(void *p)
{
#if defined(LL_WINDOWS)
- _mm_free(p);
+ _aligned_free(p);
#elif defined(LL_DARWIN)
return free(p);
#else
free(p); // posix_memalign() is compatible with heap deallocator
#endif
}
+#else // USE_TCMALLOC
+// ll_aligned_foo_16 are not needed with tcmalloc
+#define ll_aligned_malloc_16 malloc
+#define ll_aligned_realloc_16 realloc
+#define ll_aligned_free_16 free
+#endif // USE_TCMALLOC
inline void* ll_aligned_malloc_32(size_t size) // returned hunk MUST be freed with ll_aligned_free_32().
{
#if defined(LL_WINDOWS)
- return _mm_malloc(size, 32);
+ return _aligned_malloc(size, 32);
#elif defined(LL_DARWIN)
return ll_aligned_malloc( size, 32 );
#else
@@ -87,22 +104,13 @@ inline void* ll_aligned_malloc_32(size_t size) // returned hunk MUST be freed wi
inline void ll_aligned_free_32(void *p)
{
#if defined(LL_WINDOWS)
- _mm_free(p);
+ _aligned_free(p);
#elif defined(LL_DARWIN)
ll_aligned_free( p );
#else
free(p); // posix_memalign() is compatible with heap deallocator
#endif
}
-#else // LL_DEBUG
-// ll_aligned_foo are noops now that we use tcmalloc everywhere (tcmalloc aligns automatically at appropriate intervals)
-#define ll_aligned_malloc( size, align ) malloc(size)
-#define ll_aligned_free( ptr ) free(ptr)
-#define ll_aligned_malloc_16 malloc
-#define ll_aligned_free_16 free
-#define ll_aligned_malloc_32 malloc
-#define ll_aligned_free_32 free
-#endif // LL_DEBUG
#ifndef __DEBUG_PRIVATE_MEM__
#define __DEBUG_PRIVATE_MEM__ 0
@@ -512,4 +520,13 @@ void LLPrivateMemoryPoolTester::operator delete[](void* addr)
// LLSingleton moved to llsingleton.h
+LL_COMMON_API void ll_assert_aligned_func(uintptr_t ptr,U32 alignment);
+
+#ifdef SHOW_ASSERT
+#define ll_assert_aligned(ptr,alignment) ll_assert_aligned_func(reinterpret_cast<uintptr_t>(ptr),((U32)alignment))
+#else
+#define ll_assert_aligned(ptr,alignment)
+#endif
+
+
#endif
diff --git a/indra/llmath/CMakeLists.txt b/indra/llmath/CMakeLists.txt
index b5e59c1ca3..5865ae030c 100644..100755
--- a/indra/llmath/CMakeLists.txt
+++ b/indra/llmath/CMakeLists.txt
@@ -117,6 +117,7 @@ if (LL_TESTS)
# INTEGRATION TESTS
set(test_libs llmath llcommon ${LLCOMMON_LIBRARIES} ${WINDOWS_LIBRARIES})
# TODO: Some of these need refactoring to be proper Unit tests rather than Integration tests.
+ LL_ADD_INTEGRATION_TEST(alignment "" "${test_libs}")
LL_ADD_INTEGRATION_TEST(llbbox llbbox.cpp "${test_libs}")
LL_ADD_INTEGRATION_TEST(llquaternion llquaternion.cpp "${test_libs}")
LL_ADD_INTEGRATION_TEST(mathmisc "" "${test_libs}")
diff --git a/indra/llmath/llcamera.h b/indra/llmath/llcamera.h
index ec67b91d05..0b591be622 100644..100755
--- a/indra/llmath/llcamera.h
+++ b/indra/llmath/llcamera.h
@@ -60,7 +60,7 @@ static const F32 MAX_FIELD_OF_VIEW = 175.f * DEG_TO_RAD;
// roll(), pitch(), yaw()
// etc...
-
+LL_ALIGN_PREFIX(16)
class LLCamera
: public LLCoordFrame
{
@@ -108,7 +108,7 @@ public:
};
private:
- LLPlane mAgentPlanes[7]; //frustum planes in agent space a la gluUnproject (I'm a bastard, I know) - DaveP
+ LL_ALIGN_16(LLPlane mAgentPlanes[7]); //frustum planes in agent space a la gluUnproject (I'm a bastard, I know) - DaveP
U8 mPlaneMask[8]; // 8 for alignment
F32 mView; // angle between top and bottom frustum planes in radians.
@@ -116,13 +116,13 @@ private:
S32 mViewHeightInPixels; // for ViewHeightInPixels() only
F32 mNearPlane;
F32 mFarPlane;
- LLPlane mLocalPlanes[4];
+ LL_ALIGN_16(LLPlane mLocalPlanes[4]);
F32 mFixedDistance; // Always return this distance, unless < 0
LLVector3 mFrustCenter; // center of frustum and radius squared for ultra-quick exclusion test
F32 mFrustRadiusSquared;
- LLPlane mWorldPlanes[PLANE_NUM];
- LLPlane mHorizPlanes[HORIZ_PLANE_NUM];
+ LL_ALIGN_16(LLPlane mWorldPlanes[PLANE_NUM]);
+ LL_ALIGN_16(LLPlane mHorizPlanes[HORIZ_PLANE_NUM]);
U32 mPlaneCount; //defaults to 6, if setUserClipPlane is called, uses user supplied clip plane in
@@ -208,7 +208,7 @@ protected:
void calculateFrustumPlanes(F32 left, F32 right, F32 top, F32 bottom);
void calculateFrustumPlanesFromWindow(F32 x1, F32 y1, F32 x2, F32 y2);
void calculateWorldFrustumPlanes();
-};
+} LL_ALIGN_POSTFIX(16);
#endif
diff --git a/indra/llmath/llmatrix3a.cpp b/indra/llmath/llmatrix3a.cpp
index ab077abcb0..ab077abcb0 100644..100755
--- a/indra/llmath/llmatrix3a.cpp
+++ b/indra/llmath/llmatrix3a.cpp
diff --git a/indra/llmath/llmatrix3a.h b/indra/llmath/llmatrix3a.h
index adb7e3389d..9916cfd2da 100644
--- a/indra/llmath/llmatrix3a.h
+++ b/indra/llmath/llmatrix3a.h
@@ -111,7 +111,7 @@ public:
protected:
- LLVector4a mColumns[3];
+ LL_ALIGN_16(LLVector4a mColumns[3]);
};
diff --git a/indra/llmath/llmatrix4a.h b/indra/llmath/llmatrix4a.h
index 27cf5b79f6..c4cefdb4fa 100644..100755
--- a/indra/llmath/llmatrix4a.h
+++ b/indra/llmath/llmatrix4a.h
@@ -34,7 +34,7 @@
class LLMatrix4a
{
public:
- LLVector4a mMatrix[4];
+ LL_ALIGN_16(LLVector4a mMatrix[4]);
inline void clear()
{
diff --git a/indra/llmath/lloctree.h b/indra/llmath/lloctree.h
index 1b11e83b4a..6c7744cdf1 100644
--- a/indra/llmath/lloctree.h
+++ b/indra/llmath/lloctree.h
@@ -88,7 +88,7 @@ public:
typedef LLOctreeNode<T> oct_node;
typedef LLOctreeListener<T> oct_listener;
- /*void* operator new(size_t size)
+ void* operator new(size_t size)
{
return ll_aligned_malloc_16(size);
}
@@ -96,7 +96,7 @@ public:
void operator delete(void* ptr)
{
ll_aligned_free_16(ptr);
- }*/
+ }
LLOctreeNode( const LLVector4a& center,
const LLVector4a& size,
diff --git a/indra/llmath/llplane.h b/indra/llmath/llplane.h
index a611894721..3c32441b11 100644..100755
--- a/indra/llmath/llplane.h
+++ b/indra/llmath/llplane.h
@@ -36,6 +36,8 @@
// The plane normal = [A, B, C]
// The closest approach = D / sqrt(A*A + B*B + C*C)
+
+LL_ALIGN_PREFIX(16)
class LLPlane
{
public:
@@ -94,7 +96,7 @@ public:
private:
LLVector4a mV;
-};
+} LL_ALIGN_POSTFIX(16);
diff --git a/indra/llmath/llsimdmath.h b/indra/llmath/llsimdmath.h
index c7cdf7b32c..01458521ec 100644..100755
--- a/indra/llmath/llsimdmath.h
+++ b/indra/llmath/llsimdmath.h
@@ -67,11 +67,10 @@ template <typename T> T* LL_NEXT_ALIGNED_ADDRESS_64(T* address)
#define LL_ALIGN_16(var) LL_ALIGN_PREFIX(16) var LL_ALIGN_POSTFIX(16)
-
-
#include <xmmintrin.h>
#include <emmintrin.h>
+#include "llmemory.h"
#include "llsimdtypes.h"
#include "llsimdtypes.inl"
diff --git a/indra/llmath/llsimdtypes.inl b/indra/llmath/llsimdtypes.inl
index 712239e425..e905c84954 100644
--- a/indra/llmath/llsimdtypes.inl
+++ b/indra/llmath/llsimdtypes.inl
@@ -62,6 +62,7 @@ inline LLSimdScalar operator/(const LLSimdScalar& a, const LLSimdScalar& b)
inline LLSimdScalar operator-(const LLSimdScalar& a)
{
static LL_ALIGN_16(const U32 signMask[4]) = {0x80000000, 0x80000000, 0x80000000, 0x80000000 };
+ ll_assert_aligned(signMask,16);
return _mm_xor_ps(*reinterpret_cast<const LLQuad*>(signMask), a);
}
@@ -146,6 +147,7 @@ inline LLSimdScalar& LLSimdScalar::operator/=(const LLSimdScalar& rhs)
inline LLSimdScalar LLSimdScalar::getAbs() const
{
static const LL_ALIGN_16(U32 F_ABS_MASK_4A[4]) = { 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF };
+ ll_assert_aligned(F_ABS_MASK_4A,16);
return _mm_and_ps( mQ, *reinterpret_cast<const LLQuad*>(F_ABS_MASK_4A));
}
diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp
index b66b7a7076..6edeb0fefe 100644..100755
--- a/indra/llmath/llvector4a.cpp
+++ b/indra/llmath/llvector4a.cpp
@@ -24,6 +24,7 @@
* $/LicenseInfo$
*/
+#include "llmemory.h"
#include "llmath.h"
#include "llquantize.h"
@@ -44,7 +45,10 @@ extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast<const LLVector4a&> ( F
assert(dst != NULL);
assert(bytes > 0);
assert((bytes % sizeof(F32))== 0);
-
+ ll_assert_aligned(src,16);
+ ll_assert_aligned(dst,16);
+ assert(bytes%16==0);
+
F32* end = dst + (bytes / sizeof(F32) );
if (bytes > 64)
@@ -189,6 +193,8 @@ void LLVector4a::quantize16( const LLVector4a& low, const LLVector4a& high )
LLVector4a oneOverDelta;
{
static LL_ALIGN_16( const F32 F_TWO_4A[4] ) = { 2.f, 2.f, 2.f, 2.f };
+ ll_assert_aligned(F_TWO_4A,16);
+
LLVector4a two; two.load4a( F_TWO_4A );
// Here we use _mm_rcp_ps plus one round of newton-raphson
diff --git a/indra/llmath/llvector4a.h b/indra/llmath/llvector4a.h
index 596082509d..0526793d3a 100644..100755
--- a/indra/llmath/llvector4a.h
+++ b/indra/llmath/llvector4a.h
@@ -32,6 +32,7 @@ class LLRotation;
#include <assert.h>
#include "llpreprocessor.h"
+#include "llmemory.h"
///////////////////////////////////
// FIRST TIME USERS PLEASE READ
@@ -46,6 +47,7 @@ class LLRotation;
// LLVector3/LLVector4.
/////////////////////////////////
+LL_ALIGN_PREFIX(16)
class LLVector4a
{
public:
@@ -82,6 +84,7 @@ public:
}
// Copy words 16-byte blocks from src to dst. Source and destination must not overlap.
+ // Source and dest must be 16-byte aligned and size must be multiple of 16.
static void memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes);
////////////////////////////////////
@@ -90,6 +93,7 @@ public:
LLVector4a()
{ //DO NOT INITIALIZE -- The overhead is completely unnecessary
+ ll_assert_aligned(this,16);
}
LLVector4a(F32 x, F32 y, F32 z, F32 w = 0.f)
@@ -313,7 +317,7 @@ public:
private:
LLQuad mQ;
-};
+} LL_ALIGN_POSTFIX(16);
inline void update_min_max(LLVector4a& min, LLVector4a& max, const LLVector4a& p)
{
diff --git a/indra/llmath/llvector4a.inl b/indra/llmath/llvector4a.inl
index 7ad22a5631..7c52ffef21 100644
--- a/indra/llmath/llvector4a.inl
+++ b/indra/llmath/llvector4a.inl
@@ -475,6 +475,7 @@ inline void LLVector4a::setLerp(const LLVector4a& lhs, const LLVector4a& rhs, F3
inline LLBool32 LLVector4a::isFinite3() const
{
static LL_ALIGN_16(const U32 nanOrInfMask[4]) = { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 };
+ ll_assert_aligned(nanOrInfMask,16);
const __m128i nanOrInfMaskV = *reinterpret_cast<const __m128i*> (nanOrInfMask);
const __m128i maskResult = _mm_and_si128( _mm_castps_si128(mQ), nanOrInfMaskV );
const LLVector4Logical equalityCheck = _mm_castsi128_ps(_mm_cmpeq_epi32( maskResult, nanOrInfMaskV ));
diff --git a/indra/llmath/llvector4logical.h b/indra/llmath/llvector4logical.h
index dd66b09d43..c5698f7cea 100644
--- a/indra/llmath/llvector4logical.h
+++ b/indra/llmath/llvector4logical.h
@@ -27,6 +27,7 @@
#ifndef LL_VECTOR4LOGICAL_H
#define LL_VECTOR4LOGICAL_H
+#include "llmemory.h"
////////////////////////////
// LLVector4Logical
@@ -77,6 +78,7 @@ public:
inline LLVector4Logical& invert()
{
static const LL_ALIGN_16(U32 allOnes[4]) = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF };
+ ll_assert_aligned(allOnes,16);
mQ = _mm_andnot_ps( mQ, *(LLQuad*)(allOnes) );
return *this;
}
diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp
index 0c6cf1dfae..5893caecc7 100644..100755
--- a/indra/llmath/llvolume.cpp
+++ b/indra/llmath/llvolume.cpp
@@ -95,17 +95,6 @@ const S32 SCULPT_MIN_AREA_DETAIL = 1;
extern BOOL gDebugGL;
-void assert_aligned(void* ptr, uintptr_t alignment)
-{
-#if 0
- uintptr_t t = (uintptr_t) ptr;
- if (t%alignment != 0)
- {
- llerrs << "Alignment check failed." << llendl;
- }
-#endif
-}
-
BOOL check_same_clock_dir( const LLVector3& pt1, const LLVector3& pt2, const LLVector3& pt3, const LLVector3& norm)
{
LLVector3 test = (pt2-pt1)%(pt3-pt2);
@@ -6959,14 +6948,14 @@ void LLVolumeFace::resizeVertices(S32 num_verts)
if (num_verts)
{
mPositions = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts);
- assert_aligned(mPositions, 16);
+ ll_assert_aligned(mPositions, 16);
mNormals = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts);
- assert_aligned(mNormals, 16);
+ ll_assert_aligned(mNormals, 16);
//pad texture coordinate block end to allow for QWORD reads
S32 size = ((num_verts*sizeof(LLVector2)) + 0xF) & ~0xF;
mTexCoords = (LLVector2*) ll_aligned_malloc_16(size);
- assert_aligned(mTexCoords, 16);
+ ll_assert_aligned(mTexCoords, 16);
}
else
{
@@ -6990,14 +6979,17 @@ void LLVolumeFace::pushVertex(const LLVector4a& pos, const LLVector4a& norm, con
// S32 old_size = mNumVertices*16;
//positions
- mPositions = (LLVector4a*) realloc(mPositions, new_size);
+ mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_size);
+ ll_assert_aligned(mPositions,16);
//normals
- mNormals = (LLVector4a*) realloc(mNormals, new_size);
-
+ mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_size);
+ ll_assert_aligned(mNormals,16);
+
//tex coords
new_size = ((new_verts*8)+0xF) & ~0xF;
- mTexCoords = (LLVector2*) realloc(mTexCoords, new_size);
+ mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, new_size);
+ ll_assert_aligned(mTexCoords,16);
//just clear binormals
@@ -7050,7 +7042,8 @@ void LLVolumeFace::pushIndex(const U16& idx)
S32 old_size = ((mNumIndices*2)+0xF) & ~0xF;
if (new_size != old_size)
{
- mIndices = (U16*) realloc(mIndices, new_size);
+ mIndices = (U16*) ll_aligned_realloc_16(mIndices, new_size);
+ ll_assert_aligned(mIndices,16);
}
mIndices[mNumIndices++] = idx;
@@ -7091,12 +7084,12 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat
}
//allocate new buffer space
- mPositions = (LLVector4a*) realloc(mPositions, new_count*sizeof(LLVector4a));
- assert_aligned(mPositions, 16);
- mNormals = (LLVector4a*) realloc(mNormals, new_count*sizeof(LLVector4a));
- assert_aligned(mNormals, 16);
- mTexCoords = (LLVector2*) realloc(mTexCoords, (new_count*sizeof(LLVector2)+0xF) & ~0xF);
- assert_aligned(mTexCoords, 16);
+ mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_count*sizeof(LLVector4a));
+ ll_assert_aligned(mPositions, 16);
+ mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_count*sizeof(LLVector4a));
+ ll_assert_aligned(mNormals, 16);
+ mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, (new_count*sizeof(LLVector2)+0xF) & ~0xF);
+ ll_assert_aligned(mTexCoords, 16);
mNumVertices = new_count;
@@ -7142,7 +7135,7 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat
new_count = mNumIndices + face.mNumIndices;
//allocate new index buffer
- mIndices = (U16*) realloc(mIndices, (new_count*sizeof(U16)+0xF) & ~0xF);
+ mIndices = (U16*) ll_aligned_realloc_16(mIndices, (new_count*sizeof(U16)+0xF) & ~0xF);
//get destination address into new index buffer
U16* dst_idx = mIndices+mNumIndices;
diff --git a/indra/llmath/llvolumeoctree.h b/indra/llmath/llvolumeoctree.h
index 688d91dc40..dac97b14b5 100644..100755
--- a/indra/llmath/llvolumeoctree.h
+++ b/indra/llmath/llvolumeoctree.h
@@ -37,6 +37,16 @@
class LLVolumeTriangle : public LLRefCount
{
public:
+ void* operator new(size_t size)
+ {
+ return ll_aligned_malloc_16(size);
+ }
+
+ void operator delete(void* ptr)
+ {
+ ll_aligned_free_16(ptr);
+ }
+
LLVolumeTriangle()
{
@@ -58,7 +68,7 @@ public:
}
- LLVector4a mPositionGroup;
+ LL_ALIGN_16(LLVector4a mPositionGroup);
const LLVector4a* mV[3];
U16 mIndex[3];
@@ -73,6 +83,16 @@ class LLVolumeOctreeListener : public LLOctreeListener<LLVolumeTriangle>
{
public:
+ void* operator new(size_t size)
+ {
+ return ll_aligned_malloc_16(size);
+ }
+
+ void operator delete(void* ptr)
+ {
+ ll_aligned_free_16(ptr);
+ }
+
LLVolumeOctreeListener(LLOctreeNode<LLVolumeTriangle>* node);
~LLVolumeOctreeListener();
@@ -99,8 +119,8 @@ public:
public:
- LLVector4a mBounds[2]; // bounding box (center, size) of this node and all its children (tight fit to objects)
- LLVector4a mExtents[2]; // extents (min, max) of this node and all its children
+ LL_ALIGN_16(LLVector4a mBounds[2]); // bounding box (center, size) of this node and all its children (tight fit to objects)
+ LL_ALIGN_16(LLVector4a mExtents[2]); // extents (min, max) of this node and all its children
};
class LLOctreeTriangleRayIntersect : public LLOctreeTraveler<LLVolumeTriangle>
diff --git a/indra/llmath/tests/alignment_test.cpp b/indra/llmath/tests/alignment_test.cpp
new file mode 100755
index 0000000000..8961b9d6d5
--- /dev/null
+++ b/indra/llmath/tests/alignment_test.cpp
@@ -0,0 +1,120 @@
+/**
+ * @file v3dmath_test.cpp
+ * @author Vir
+ * @date 2011-12
+ * @brief v3dmath test cases.
+ *
+ * $LicenseInfo:firstyear=2011&license=viewerlgpl$
+ * Second Life Viewer Source Code
+ * Copyright (C) 2011, Linden Research, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License only.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA
+ * $/LicenseInfo$
+ */
+
+// Tests related to allocating objects with alignment constraints, particularly for SSE support.
+
+#include "linden_common.h"
+#include "../test/lltut.h"
+#include "../llmath.h"
+#include "../llsimdmath.h"
+#include "../llvector4a.h"
+
+void* operator new(size_t size)
+{
+ return ll_aligned_malloc_16(size);
+}
+
+void operator delete(void *p)
+{
+ ll_aligned_free_16(p);
+}
+
+namespace tut
+{
+
+#define is_aligned(ptr,alignment) ((reinterpret_cast<uintptr_t>(ptr))%(alignment)==0)
+#define is_aligned_relative(ptr,base_ptr,alignment) ((reinterpret_cast<uintptr_t>(ptr)-reinterpret_cast<uintptr_t>(base_ptr))%(alignment)==0)
+
+struct alignment_test {};
+
+typedef test_group<alignment_test> alignment_test_t;
+typedef alignment_test_t::object alignment_test_object_t;
+tut::alignment_test_t tut_alignment_test("LLAlignment");
+
+LL_ALIGN_PREFIX(16)
+class MyVector4a
+{
+ LLQuad mQ;
+} LL_ALIGN_POSTFIX(16);
+
+
+// Verify that aligned allocators perform as advertised.
+template<> template<>
+void alignment_test_object_t::test<1>()
+{
+ const int num_tests = 7;
+ void *align_ptr;
+ for (int i=0; i<num_tests; i++)
+ {
+ align_ptr = ll_aligned_malloc_16(sizeof(MyVector4a));
+ ensure("ll_aligned_malloc_16 failed", is_aligned(align_ptr,16));
+
+ align_ptr = ll_aligned_realloc_16(align_ptr,2*sizeof(MyVector4a));
+ ensure("ll_aligned_realloc_16 failed", is_aligned(align_ptr,16));
+
+ ll_aligned_free_16(align_ptr);
+
+ align_ptr = ll_aligned_malloc_32(sizeof(MyVector4a));
+ ensure("ll_aligned_malloc_32 failed", is_aligned(align_ptr,32));
+ ll_aligned_free_32(align_ptr);
+ }
+}
+
+// In-place allocation of objects and arrays.
+template<> template<>
+void alignment_test_object_t::test<2>()
+{
+ MyVector4a vec1;
+ ensure("LLAlignment vec1 unaligned", is_aligned(&vec1,16));
+
+ MyVector4a veca[12];
+ ensure("LLAlignment veca unaligned", is_aligned(veca,16));
+}
+
+// Heap allocation of objects and arrays.
+template<> template<>
+void alignment_test_object_t::test<3>()
+{
+ const int ARR_SIZE = 7;
+ for(int i=0; i<ARR_SIZE; i++)
+ {
+ MyVector4a *vecp = new MyVector4a;
+ ensure("LLAlignment vecp unaligned", is_aligned(vecp,16));
+ delete vecp;
+ }
+
+ MyVector4a *veca = new MyVector4a[ARR_SIZE];
+ ensure("LLAligment veca base", is_aligned(veca,16));
+ for(int i=0; i<ARR_SIZE; i++)
+ {
+ std::cout << "veca[" << i << "]" << std::endl;
+ ensure("LLAlignment veca member unaligned", is_aligned(&veca[i],16));
+ }
+}
+
+}
diff --git a/indra/llprimitive/llmodel.cpp b/indra/llprimitive/llmodel.cpp
index cb32a510b8..28ed051c55 100644
--- a/indra/llprimitive/llmodel.cpp
+++ b/indra/llprimitive/llmodel.cpp
@@ -1026,7 +1026,8 @@ void LLModel::setVolumeFaceData(
if (tc.get())
{
- LLVector4a::memcpyNonAliased16((F32*) face.mTexCoords, (F32*) tc.get(), num_verts*2*sizeof(F32));
+ U32 tex_size = (num_verts*2*sizeof(F32)+0xF)&~0xF;
+ LLVector4a::memcpyNonAliased16((F32*) face.mTexCoords, (F32*) tc.get(), tex_size);
}
else
{
diff --git a/indra/newview/CMakeLists.txt b/indra/newview/CMakeLists.txt
index f85b943c70..b12145d5f1 100644..100755
--- a/indra/newview/CMakeLists.txt
+++ b/indra/newview/CMakeLists.txt
@@ -1511,9 +1511,7 @@ set(PACKAGE ON CACHE BOOL
if (WINDOWS)
set_target_properties(${VIEWER_BINARY_NAME}
PROPERTIES
- # *TODO -reenable this once we get server usage sorted out
- #LINK_FLAGS "/debug /NODEFAULTLIB:LIBCMT /SUBSYSTEM:WINDOWS /INCLUDE:\"__tcmalloc\""
- LINK_FLAGS "/debug /NODEFAULTLIB:LIBCMT /SUBSYSTEM:WINDOWS /INCLUDE:__tcmalloc"
+ LINK_FLAGS "/debug /NODEFAULTLIB:LIBCMT /SUBSYSTEM:WINDOWS ${TCMALLOC_LINK_FLAGS}"
LINK_FLAGS_DEBUG "/NODEFAULTLIB:\"LIBCMT;LIBCMTD;MSVCRT\" /INCREMENTAL:NO"
LINK_FLAGS_RELEASE ""
)
@@ -1532,7 +1530,7 @@ if (WINDOWS)
# In the meantime, if you have any ideas on how to easily maintain one list, either here or in viewer_manifest.py
# and have the build deps get tracked *please* tell me about it.
- if(USE_GOOGLE_PERFTOOLS)
+ if(USE_TCMALLOC)
# Configure a var for tcmalloc location, if used.
# Note the need to specify multiple names explicitly.
set(GOOGLE_PERF_TOOLS_SOURCE
@@ -1540,7 +1538,7 @@ if (WINDOWS)
${SHARED_LIB_STAGING_DIR}/RelWithDebInfo/libtcmalloc_minimal.dll
${SHARED_LIB_STAGING_DIR}/Debug/libtcmalloc_minimal-debug.dll
)
- endif(USE_GOOGLE_PERFTOOLS)
+ endif(USE_TCMALLOC)
set(COPY_INPUT_DEPENDENCIES
diff --git a/indra/newview/llappviewerwin32.cpp b/indra/newview/llappviewerwin32.cpp
index 647ace7ee3..8087453ee5 100644
--- a/indra/newview/llappviewerwin32.cpp
+++ b/indra/newview/llappviewerwin32.cpp
@@ -117,7 +117,7 @@ int APIENTRY WINMAIN(HINSTANCE hInstance,
// This results in a 2-3x improvement in opening a new Inventory window (which uses a large numebr of allocations)
// Note: This won't work when running from the debugger unless the _NO_DEBUG_HEAP environment variable is set to 1
- _CrtSetDbgFlag(0); // default, just making explicit
+ _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF); // default, just making explicit
ULONG ulEnableLFH = 2;
HANDLE* hHeaps = new HANDLE[MAX_HEAPS];
diff --git a/indra/newview/lldrawable.h b/indra/newview/lldrawable.h
index e268640a21..9a66490b03 100644..100755
--- a/indra/newview/lldrawable.h
+++ b/indra/newview/lldrawable.h
@@ -59,6 +59,7 @@ class LLViewerTexture;
const U32 SILHOUETTE_HIGHLIGHT = 0;
// All data for new renderer goes into this class.
+LL_ALIGN_PREFIX(16)
class LLDrawable : public LLRefCount
{
public:
@@ -75,6 +76,16 @@ public:
static void initClass();
+ void* operator new(size_t size)
+ {
+ return ll_aligned_malloc_16(size);
+ }
+
+ void operator delete(void* ptr)
+ {
+ ll_aligned_free_16(ptr);
+ }
+
LLDrawable() { init(); }
MEM_TYPE_NEW(LLMemType::MTYPE_DRAWABLE);
@@ -280,8 +291,8 @@ public:
} EDrawableFlags;
private: //aligned members
- LLVector4a mExtents[2];
- LLVector4a mPositionGroup;
+ LL_ALIGN_16(LLVector4a mExtents[2]);
+ LL_ALIGN_16(LLVector4a mPositionGroup);
public:
LLXformMatrix mXform;
@@ -322,7 +333,7 @@ private:
static U32 sNumZombieDrawables;
static LLDynamicArrayPtr<LLPointer<LLDrawable> > sDeadList;
-};
+} LL_ALIGN_POSTFIX(16);
inline LLFace* LLDrawable::getFace(const S32 i) const
diff --git a/indra/newview/lldynamictexture.h b/indra/newview/lldynamictexture.h
index e18090545d..c51e7d1e1a 100644..100755
--- a/indra/newview/lldynamictexture.h
+++ b/indra/newview/lldynamictexture.h
@@ -36,6 +36,16 @@
class LLViewerDynamicTexture : public LLViewerTexture
{
public:
+ void* operator new(size_t size)
+ {
+ return ll_aligned_malloc_16(size);
+ }
+
+ void operator delete(void* ptr)
+ {
+ ll_aligned_free_16(ptr);
+ }
+
enum
{
LL_VIEWER_DYNAMIC_TEXTURE = LLViewerTexture::DYNAMIC_TEXTURE,
@@ -85,7 +95,7 @@ protected:
protected:
BOOL mClamp;
LLCoordGL mOrigin;
- LLCamera mCamera;
+ LL_ALIGN_16(LLCamera mCamera);
typedef std::set<LLViewerDynamicTexture*> instance_list_t;
static instance_list_t sInstances[ LLViewerDynamicTexture::ORDER_COUNT ];
diff --git a/indra/newview/llface.cpp b/indra/newview/llface.cpp
index 6dbeae6677..f60f311b9e 100644
--- a/indra/newview/llface.cpp
+++ b/indra/newview/llface.cpp
@@ -1377,7 +1377,8 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,
{
if (!do_xform)
{
- LLVector4a::memcpyNonAliased16((F32*) tex_coords.get(), (F32*) vf.mTexCoords, num_vertices*2*sizeof(F32));
+ S32 tc_size = (num_vertices*2*sizeof(F32)+0xF) & ~0xF;
+ LLVector4a::memcpyNonAliased16((F32*) tex_coords.get(), (F32*) vf.mTexCoords, tc_size);
}
else
{
diff --git a/indra/newview/llface.h b/indra/newview/llface.h
index 82e4ab61b7..feb6244f72 100644..100755
--- a/indra/newview/llface.h
+++ b/indra/newview/llface.h
@@ -59,6 +59,17 @@ class LLFace
{
public:
+ void* operator new(size_t size)
+ {
+ return ll_aligned_malloc_16(size);
+ }
+
+ void operator delete(void* ptr)
+ {
+ ll_aligned_free_16(ptr);
+ }
+
+
LLFace(const LLFace& rhs)
{
*this = rhs;
diff --git a/indra/newview/llfloatermodelpreview.cpp b/indra/newview/llfloatermodelpreview.cpp
index 64bdcccd9f..8aa3b95578 100755
--- a/indra/newview/llfloatermodelpreview.cpp
+++ b/indra/newview/llfloatermodelpreview.cpp
@@ -4770,7 +4770,8 @@ void LLModelPreview::genBuffers(S32 lod, bool include_skin_weights)
if (vf.mTexCoords)
{
vb->getTexCoord0Strider(tc_strider);
- LLVector4a::memcpyNonAliased16((F32*) tc_strider.get(), (F32*) vf.mTexCoords, num_vertices*2*sizeof(F32));
+ S32 tex_size = (num_vertices*2*sizeof(F32)+0xF) & ~0xF;
+ LLVector4a::memcpyNonAliased16((F32*) tc_strider.get(), (F32*) vf.mTexCoords, tex_size);
}
if (vf.mNormals)
diff --git a/indra/newview/llpolymesh.cpp b/indra/newview/llpolymesh.cpp
index 450f9b2be7..0860506086 100644..100755
--- a/indra/newview/llpolymesh.cpp
+++ b/indra/newview/llpolymesh.cpp
@@ -129,22 +129,22 @@ void LLPolyMeshSharedData::freeMeshData()
{
mNumVertices = 0;
- delete [] mBaseCoords;
+ ll_aligned_free_16(mBaseCoords);
mBaseCoords = NULL;
- delete [] mBaseNormals;
+ ll_aligned_free_16(mBaseNormals);
mBaseNormals = NULL;
- delete [] mBaseBinormals;
+ ll_aligned_free_16(mBaseBinormals);
mBaseBinormals = NULL;
- delete [] mTexCoords;
+ ll_aligned_free_16(mTexCoords);
mTexCoords = NULL;
- delete [] mDetailTexCoords;
+ ll_aligned_free_16(mDetailTexCoords);
mDetailTexCoords = NULL;
- delete [] mWeights;
+ ll_aligned_free_16(mWeights);
mWeights = NULL;
}
@@ -229,12 +229,12 @@ U32 LLPolyMeshSharedData::getNumKB()
BOOL LLPolyMeshSharedData::allocateVertexData( U32 numVertices )
{
U32 i;
- mBaseCoords = new LLVector3[ numVertices ];
- mBaseNormals = new LLVector3[ numVertices ];
- mBaseBinormals = new LLVector3[ numVertices ];
- mTexCoords = new LLVector2[ numVertices ];
- mDetailTexCoords = new LLVector2[ numVertices ];
- mWeights = new F32[ numVertices ];
+ mBaseCoords = (LLVector3*) ll_aligned_malloc_16(numVertices*sizeof(LLVector3));
+ mBaseNormals = (LLVector3*) ll_aligned_malloc_16(numVertices*sizeof(LLVector3));
+ mBaseBinormals = (LLVector3*) ll_aligned_malloc_16(numVertices*sizeof(LLVector3));
+ mTexCoords = (LLVector2*) ll_aligned_malloc_16(numVertices*sizeof(LLVector2));
+ mDetailTexCoords = (LLVector2*) ll_aligned_malloc_16(numVertices*sizeof(LLVector2));
+ mWeights = (F32*) ll_aligned_malloc_16(numVertices*sizeof(F32));
for (i = 0; i < numVertices; i++)
{
mWeights[i] = 0.f;
diff --git a/indra/newview/llspatialpartition.cpp b/indra/newview/llspatialpartition.cpp
index 900f126049..5a74cb2fb3 100644..100755
--- a/indra/newview/llspatialpartition.cpp
+++ b/indra/newview/llspatialpartition.cpp
@@ -550,6 +550,7 @@ void LLSpatialGroup::setVisible()
void LLSpatialGroup::validate()
{
+ ll_assert_aligned(this,64);
#if LL_OCTREE_PARANOIA_CHECK
sg_assert(!isState(DIRTY));
@@ -1199,6 +1200,8 @@ LLSpatialGroup::LLSpatialGroup(OctreeNode* node, LLSpatialPartition* part) :
mCurUpdatingSlotp(NULL),
mCurUpdatingTexture (NULL)
{
+ ll_assert_aligned(this,16);
+
sNodeCount++;
LLMemType mt(LLMemType::MTYPE_SPACE_PARTITION);
diff --git a/indra/newview/llspatialpartition.h b/indra/newview/llspatialpartition.h
index 899547ae4d..efd6be2ccd 100644..100755
--- a/indra/newview/llspatialpartition.h
+++ b/indra/newview/llspatialpartition.h
@@ -68,6 +68,16 @@ protected:
~LLDrawInfo();
public:
+ void* operator new(size_t size)
+ {
+ return ll_aligned_malloc_16(size);
+ }
+
+ void operator delete(void* ptr)
+ {
+ ll_aligned_free_16(ptr);
+ }
+
LLDrawInfo(const LLDrawInfo& rhs)
{
@@ -106,7 +116,7 @@ public:
F32 mPartSize;
F32 mVSize;
LLSpatialGroup* mGroup;
- LLFace* mFace; //associated face
+ LL_ALIGN_16(LLFace* mFace); //associated face
F32 mDistance;
U32 mDrawMode;
@@ -181,7 +191,7 @@ public:
};
};
-LL_ALIGN_PREFIX(64)
+LL_ALIGN_PREFIX(16)
class LLSpatialGroup : public LLOctreeListener<LLDrawable>
{
friend class LLSpatialPartition;
@@ -193,6 +203,16 @@ public:
*this = rhs;
}
+ void* operator new(size_t size)
+ {
+ return ll_aligned_malloc_16(size);
+ }
+
+ void operator delete(void* ptr)
+ {
+ ll_aligned_free_16(ptr);
+ }
+
const LLSpatialGroup& operator=(const LLSpatialGroup& rhs)
{
llerrs << "Illegal operation!" << llendl;
@@ -372,12 +392,12 @@ public:
V4_COUNT = 10
} eV4Index;
- LLVector4a mBounds[2]; // bounding box (center, size) of this node and all its children (tight fit to objects)
- LLVector4a mExtents[2]; // extents (min, max) of this node and all its children
- LLVector4a mObjectExtents[2]; // extents (min, max) of objects in this node
- LLVector4a mObjectBounds[2]; // bounding box (center, size) of objects in this node
- LLVector4a mViewAngle;
- LLVector4a mLastUpdateViewAngle;
+ LL_ALIGN_16(LLVector4a mBounds[2]); // bounding box (center, size) of this node and all its children (tight fit to objects)
+ LL_ALIGN_16(LLVector4a mExtents[2]); // extents (min, max) of this node and all its children
+ LL_ALIGN_16(LLVector4a mObjectExtents[2]); // extents (min, max) of objects in this node
+ LL_ALIGN_16(LLVector4a mObjectBounds[2]); // bounding box (center, size) of objects in this node
+ LL_ALIGN_16(LLVector4a mViewAngle);
+ LL_ALIGN_16(LLVector4a mLastUpdateViewAngle);
private:
U32 mCurUpdatingTime ;
diff --git a/indra/newview/llviewercamera.h b/indra/newview/llviewercamera.h
index cc3395115b..d2624ba14e 100644..100755
--- a/indra/newview/llviewercamera.h
+++ b/indra/newview/llviewercamera.h
@@ -51,9 +51,19 @@ const BOOL NOT_FOR_SELECTION = FALSE;
extern template class LLViewerCamera* LLSingleton<class LLViewerCamera>::getInstance();
#endif
+LL_ALIGN_PREFIX(16)
class LLViewerCamera : public LLCamera, public LLSingleton<LLViewerCamera>
{
public:
+ void* operator new(size_t size)
+ {
+ return ll_aligned_malloc_16(size);
+ }
+
+ void operator delete(void* ptr)
+ {
+ ll_aligned_free_16(ptr);
+ }
typedef enum
{
@@ -137,6 +147,7 @@ protected:
S16 mZoomSubregion;
public:
-};
+} LL_ALIGN_POSTFIX(16);
+
#endif // LL_LLVIEWERCAMERA_H
diff --git a/indra/newview/llviewerjointmesh.cpp b/indra/newview/llviewerjointmesh.cpp
index 76f4e18c27..d604687678 100644..100755
--- a/indra/newview/llviewerjointmesh.cpp
+++ b/indra/newview/llviewerjointmesh.cpp
@@ -731,8 +731,10 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w
F32* vw = (F32*) vertex_weightsp.get();
F32* cw = (F32*) clothing_weightsp.get();
- LLVector4a::memcpyNonAliased16(tc, (F32*) mMesh->getTexCoords(), num_verts*2*sizeof(F32));
- LLVector4a::memcpyNonAliased16(vw, (F32*) mMesh->getWeights(), num_verts*sizeof(F32));
+ S32 tc_size = (num_verts*2*sizeof(F32)+0xF) & ~0xF;
+ LLVector4a::memcpyNonAliased16(tc, (F32*) mMesh->getTexCoords(), tc_size);
+ S32 vw_size = (num_verts*sizeof(F32)+0xF) & ~0xF;
+ LLVector4a::memcpyNonAliased16(vw, (F32*) mMesh->getWeights(), vw_size);
LLVector4a::memcpyNonAliased16(cw, (F32*) mMesh->getClothingWeights(), num_verts*4*sizeof(F32));
}
diff --git a/indra/newview/llvoavatar.cpp b/indra/newview/llvoavatar.cpp
index 68637a7ed9..f797767163 100644..100755
--- a/indra/newview/llvoavatar.cpp
+++ b/indra/newview/llvoavatar.cpp
@@ -2595,7 +2595,7 @@ void LLVOAvatar::idleUpdateMisc(bool detailed_update)
if (isImpostor() && !mNeedsImpostorUpdate)
{
- LLVector4a ext[2];
+ LL_ALIGN_16(LLVector4a ext[2]);
F32 distance;
LLVector3 angle;
diff --git a/indra/newview/llvoavatar.h b/indra/newview/llvoavatar.h
index 59796370ae..20ea3686d6 100644..100755
--- a/indra/newview/llvoavatar.h
+++ b/indra/newview/llvoavatar.h
@@ -90,6 +90,16 @@ protected:
**/
public:
+ void* operator new(size_t size)
+ {
+ return ll_aligned_malloc_16(size);
+ }
+
+ void operator delete(void* ptr)
+ {
+ ll_aligned_free_16(ptr);
+ }
+
LLVOAvatar(const LLUUID &id, const LLPCode pcode, LLViewerRegion *regionp);
virtual void markDead();
static void initClass(); // Initialize data that's only init'd once per class.
@@ -212,7 +222,7 @@ public:
bool isBuilt() const { return mIsBuilt; }
private: //aligned members
- LLVector4a mImpostorExtents[2];
+ LL_ALIGN_16(LLVector4a mImpostorExtents[2]);
private:
BOOL mSupportsAlphaLayers; // For backwards compatibility, TRUE for 1.23+ clients
diff --git a/indra/newview/llvoavatarself.h b/indra/newview/llvoavatarself.h
index 74ff47a3e4..23a2025e70 100644..100755
--- a/indra/newview/llvoavatarself.h
+++ b/indra/newview/llvoavatarself.h
@@ -48,6 +48,16 @@ class LLVOAvatarSelf :
**/
public:
+ void* operator new(size_t size)
+ {
+ return ll_aligned_malloc_16(size);
+ }
+
+ void operator delete(void* ptr)
+ {
+ ll_aligned_free_16(ptr);
+ }
+
LLVOAvatarSelf(const LLUUID &id, const LLPCode pcode, LLViewerRegion *regionp);
virtual ~LLVOAvatarSelf();
virtual void markDead();
diff --git a/indra/newview/viewer_manifest.py b/indra/newview/viewer_manifest.py
index 0931c4ec9b..43abae63f1 100644..100755
--- a/indra/newview/viewer_manifest.py
+++ b/indra/newview/viewer_manifest.py
@@ -1057,9 +1057,15 @@ class Linux_i686Manifest(LinuxManifest):
self.path("libopenal.so", "libopenal.so.1")
self.path("libopenal.so", "libvivoxoal.so.1") # vivox's sdk expects this soname
self.path("libfontconfig.so.1.4.4")
- self.path("libtcmalloc.so", "libtcmalloc.so") #formerly called google perf tools
- self.path("libtcmalloc.so.0", "libtcmalloc.so.0") #formerly called google perf tools
- self.path("libtcmalloc.so.0.1.0", "libtcmalloc.so.0.1.0") #formerly called google perf tools
+ try:
+ self.path("libtcmalloc.so", "libtcmalloc.so") #formerly called google perf tools
+ self.path("libtcmalloc.so.0", "libtcmalloc.so.0") #formerly called google perf tools
+ self.path("libtcmalloc.so.0.1.0", "libtcmalloc.so.0.1.0") #formerly called google perf tools
+ pass
+ except:
+ print "tcmalloc files not found, skipping"
+ pass
+
try:
self.path("libfmod-3.75.so")
pass