diff options
40 files changed, 363 insertions, 153 deletions
@@ -71,35 +71,35 @@ b53a0576eec80614d7767ed72b40ed67aeff27c9 DRTVWR-38_2.5.2-release 9283d6d1d7eb71dfe4c330e7c9144857e7356bde DRTVWR-40_2.6.0-beta1 9e4641f4a7870c0f565a25a2971368d5a29516a1 2.6.0-beta2 9e4641f4a7870c0f565a25a2971368d5a29516a1 DRTVWR-41_2.6.0-beta2 -42f32494bac475d0737799346f6831558ae8bf5d 2.6.0-release -42f32494bac475d0737799346f6831558ae8bf5d DRTVWR-39_2.6.0-release c5bdef3aaa2744626aef3c217ce29e1900d357b3 2.6.1-beta1 c5bdef3aaa2744626aef3c217ce29e1900d357b3 2.6.1-start c5bdef3aaa2744626aef3c217ce29e1900d357b3 DRTVWR-43_2.6.1-beta1 -c9182ed77d427c759cfacf49a7b71a2e20d522aa 2.6.1-release -c9182ed77d427c759cfacf49a7b71a2e20d522aa DRTVWR-42_2.6.1-release 56b2778c743c2a964d82e1caf11084d76a87de2c 2.6.2-start d1203046bb653b763f835b04d184646949d8dd5c 2.6.2-beta1 d1203046bb653b763f835b04d184646949d8dd5c DRTVWR-45_2.6.2-beta1 -214180ad5714ce8392b82bbebcc92f4babd98300 2.6.2-release -214180ad5714ce8392b82bbebcc92f4babd98300 DRTVWR-44_2.6.2-release +42f32494bac475d0737799346f6831558ae8bf5d 2.6.0-release +42f32494bac475d0737799346f6831558ae8bf5d DRTVWR-39_2.6.0-release +c9182ed77d427c759cfacf49a7b71a2e20d522aa 2.6.1-release +c9182ed77d427c759cfacf49a7b71a2e20d522aa DRTVWR-42_2.6.1-release 52b2263ab28f0976c689fd0b76c55a9eb027cdbf end-of-develop.py ec32f1045e7c2644015245df3a9933620aa194b8 2.6.3-start d7fcefabdf32bb61a9ea6d6037c1bb26190a85bc 2.6.3-beta1 d7fcefabdf32bb61a9ea6d6037c1bb26190a85bc DRTVWR-47_2.6.3-beta1 0630e977504af5ea320c58d33cae4e1ddee793e9 2.6.3-beta2 0630e977504af5ea320c58d33cae4e1ddee793e9 DRTVWR-48_2.6.3-beta2 -8f2da1701c81a62352df2b8d413d27fb2cade9a6 2.6.3-release -8f2da1701c81a62352df2b8d413d27fb2cade9a6 DRTVWR-46_2.6.3-release 3178e311da3a8739a85363665006ea3c4610cad4 dons-headless-hackathon-work +214180ad5714ce8392b82bbebcc92f4babd98300 2.6.2-release +214180ad5714ce8392b82bbebcc92f4babd98300 DRTVWR-44_2.6.2-release 7db558aaa7c176f2022b3e9cfe38ac72f6d1fccd 2.6.5-beta1 7db558aaa7c176f2022b3e9cfe38ac72f6d1fccd DRTVWR-50_2.6.5-beta1 +8f2da1701c81a62352df2b8d413d27fb2cade9a6 2.6.3-release +8f2da1701c81a62352df2b8d413d27fb2cade9a6 DRTVWR-46_2.6.3-release 800cefce8d364ffdd2f383cbecb91294da3ea424 2.6.6-start bb1075286b3b147b1dae2e3d6b2d56f04ff03f35 2.6.6-beta1 bb1075286b3b147b1dae2e3d6b2d56f04ff03f35 DRTVWR-52_2.6.6-beta1 +5e349dbe9cc84ea5795af8aeb6d473a0af9d4953 2.6.8-start dac76a711da5f1489a01c1fa62ec97d99c25736d 2.6.6-release dac76a711da5f1489a01c1fa62ec97d99c25736d DRTVWR-51_2.6.6-release -5e349dbe9cc84ea5795af8aeb6d473a0af9d4953 2.6.8-start beafa8a9bd1d1b670b7523d865204dc4a4b38eef 2.6.8-beta1 beafa8a9bd1d1b670b7523d865204dc4a4b38eef DRTVWR-55_2.6.8-beta1 be2000b946f8cb3de5f44b2d419287d4c48ec4eb 2.6.8-release @@ -118,50 +118,50 @@ e67da2c6e3125966dd49eef98b36317afac1fcfe 2.6.9-start 9f79a6ed8fdcd2f3dac33ea6b3236eeb278dccfe 2.7.2-start e0dc8b741eaa27dcdfbc9e956bb2579b954d15eb 2.7.2-beta1 e0dc8b741eaa27dcdfbc9e956bb2579b954d15eb DRTVWR-63_2.7.2-beta1 +6a3e7e403bd19e45fdfc2fcc716867af3ab80861 2.7.3-start fe3a8e7973072ea62043c08b19b66626c1a720eb 2.7.1-release fe3a8e7973072ea62043c08b19b66626c1a720eb 2.7.2-release fe3a8e7973072ea62043c08b19b66626c1a720eb DRTVWR-60_2.7.1-release fe3a8e7973072ea62043c08b19b66626c1a720eb DRTVWR-62_2.7.2-release -6a3e7e403bd19e45fdfc2fcc716867af3ab80861 2.7.3-start 6af10678de4736222b2c3f7e010e984fb5b327de 2.7.4-start be963a4eef635542f9617d7f5fd22ba48fb71958 2.7.4-beta1 be963a4eef635542f9617d7f5fd22ba48fb71958 DRTVWR-67_2.7.4-beta1 -057f319dd8eccdf63a54d99686c68cdcb31b6abc 2.7.4-release -057f319dd8eccdf63a54d99686c68cdcb31b6abc DRTVWR-66_2.7.4-release 19a498fa62570f352d7d246f17e3c81cc1d82d8b 2.7.5-start 09984bfa6cae17e0f72d02b75c1b7393c65eecfc 2.7.5-beta1 09984bfa6cae17e0f72d02b75c1b7393c65eecfc DRTVWR-69_2.7.5-beta1 -6866d9df6efbd441c66451debd376d21211de39c 2.7.5-release -6866d9df6efbd441c66451debd376d21211de39c DRTVWR-68_2.7.5-release e1ed60913230dd64269a7f7fc52cbc6004f6d52c 2.8.0-beta1 e1ed60913230dd64269a7f7fc52cbc6004f6d52c 2.8.0-start e1ed60913230dd64269a7f7fc52cbc6004f6d52c DRTVWR-71_2.8.0-beta1 -493d9127ee50e84ba08a736a65a23ca86f7a5b01 2.8.0-release -493d9127ee50e84ba08a736a65a23ca86f7a5b01 DRTVWR-70_2.8.0-release +057f319dd8eccdf63a54d99686c68cdcb31b6abc 2.7.4-release +057f319dd8eccdf63a54d99686c68cdcb31b6abc DRTVWR-66_2.7.4-release +6866d9df6efbd441c66451debd376d21211de39c 2.7.5-release +6866d9df6efbd441c66451debd376d21211de39c DRTVWR-68_2.7.5-release 502f6a5deca9365ddae57db4f1e30172668e171e 2.8.1-start 2c7e459e0c883f8e406b932e41e60097e9ee077e 2.8.1-beta1 2c7e459e0c883f8e406b932e41e60097e9ee077e DRTVWR-73_2.8.1-beta1 +493d9127ee50e84ba08a736a65a23ca86f7a5b01 2.8.0-release +493d9127ee50e84ba08a736a65a23ca86f7a5b01 DRTVWR-70_2.8.0-release +54bc7823ad4e3a436fef79710f685a7372bbf795 2.8.2-start +ac0f1a132d35c02a58861d37cca75b0429ac9137 2.8.3-start 29e93d7e19991011bd12b5748142b11a5dcb4370 2.8.1-release 29e93d7e19991011bd12b5748142b11a5dcb4370 DRTVWR-72_2.8.1-release 4780e3bd2b3042f91be3426151f28c30d199bb3b 2.8.1-hotfix 4780e3bd2b3042f91be3426151f28c30d199bb3b DRTVWR-76_2.8.1-hotfix -54bc7823ad4e3a436fef79710f685a7372bbf795 2.8.2-start -ac0f1a132d35c02a58861d37cca75b0429ac9137 2.8.3-start 599677276b227357140dda35bea4a2c18e2e67b5 2.8.3-beta1 599677276b227357140dda35bea4a2c18e2e67b5 DRTVWR-75_2.8.3-beta1 -fb85792b84bf28428889c4cc966469d92e5dac4c 2.8.3-release -fb85792b84bf28428889c4cc966469d92e5dac4c DRTVWR-74_2.8.3-release 6b678ea52f90d5c14181661dcd2546e25bde483e 3.0.0-start b0be6ce3adfef3a014a2389d360539f8a86c5439 3.0.0-beta1 b0be6ce3adfef3a014a2389d360539f8a86c5439 DRTVWR-78_3.0.0-beta1 -1778f26b6d0ae762dec3ca37140f66620f2485d9 3.0.0-release -1778f26b6d0ae762dec3ca37140f66620f2485d9 DRTVWR-77_3.0.0-release +fb85792b84bf28428889c4cc966469d92e5dac4c 2.8.3-release +fb85792b84bf28428889c4cc966469d92e5dac4c DRTVWR-74_2.8.3-release 82a2079ffcb57ecb1b3849cb41376b443e1eb912 3.0.1-start 364fd63517fbacbbcb9129d096187171ba8c9e48 3.0.1-beta1 364fd63517fbacbbcb9129d096187171ba8c9e48 DRTVWR-81_3.0.1-beta1 f2412ecd6740803ea9452f1d17fd872e263a0df7 3.0.2-start 42784bf50fa01974bada2a1af3892ee09c93fcda 3.0.2-beta1 42784bf50fa01974bada2a1af3892ee09c93fcda DRTVWR-83_3.0.2-beta1 +1778f26b6d0ae762dec3ca37140f66620f2485d9 3.0.0-release +1778f26b6d0ae762dec3ca37140f66620f2485d9 DRTVWR-77_3.0.0-release e5c9af2d7980a99a71650be3a0cf7b2b3c3b897e 3.0.2-beta2 e5c9af2d7980a99a71650be3a0cf7b2b3c3b897e DRTVWR-86_3.0.2-beta2 b95ddac176ac944efdc85cbee94ac2e1eab44c79 3.0.3-start @@ -169,9 +169,9 @@ b95ddac176ac944efdc85cbee94ac2e1eab44c79 3.0.3-start 6694f3f062aa45f64ab391d25a3eb3d5eb1b0871 DRTVWR-85_3.0.3-beta1 61aa7974df089e8621fe9a4c69bcdefdb3cc208a 3.0.3-beta2 61aa7974df089e8621fe9a4c69bcdefdb3cc208a DRTVWR-89_3.0.3-beta2 +586907287be581817b2422b5137971b22d54ea48 3.0.4-start 0496d2f74043cf4e6058e76ac3db03d44cff42ce 3.0.3-release 0496d2f74043cf4e6058e76ac3db03d44cff42ce DRTVWR-84_3.0.3-release -586907287be581817b2422b5137971b22d54ea48 3.0.4-start 92a3aa04775438226399b19deee12ac3b5a62838 3.0.5-start c7282e59f374ee904bd793c3c444455e3399b0c5 3.1.0-start 2657fa785bbfac115852c41bd0adaff74c2ad5da 3.1.0-beta1 @@ -192,11 +192,11 @@ e440cd1dfbd128d7d5467019e497f7f803640ad6 DRTVWR-95_3.2.0-beta1 c4911ec8cd81e676dfd2af438b3e065407a94a7a 3.2.1-start 9e390d76807fa70d356b8716fb83b8ce42a629ef 3.2.1-beta1 9e390d76807fa70d356b8716fb83b8ce42a629ef DRTVWR-100_3.2.1-beta1 -a8c7030d6845186fac7c188be4323a0e887b4184 3.2.1-release -a8c7030d6845186fac7c188be4323a0e887b4184 DRTVWR-99_3.2.1-release 40b46edba007d15d0059c80864b708b99c1da368 3.2.2-start 523df3e67378541498d516d52af4402176a26bac 3.2.2-beta1 523df3e67378541498d516d52af4402176a26bac DRTVWR-102_3.2.2-beta1 +a8c7030d6845186fac7c188be4323a0e887b4184 3.2.1-release +a8c7030d6845186fac7c188be4323a0e887b4184 DRTVWR-99_3.2.1-release 80f3e30d8aa4d8f674a48bd742aaa6d8e9eae0b5 3.2.3-start 3fe994349fae64fc40874bb59db387131eb35a41 3.2.4-beta1 3fe994349fae64fc40874bb59db387131eb35a41 3.2.4-start @@ -278,10 +278,6 @@ a8057e1b9a1246b434a27405be35e030f7d28b0c 3.3.4-beta3 9cd174d3a54d93d409a7c346a15b8bfb40fc58f4 DRTVWR-184 ab2ffc547c8a8950ff187c4f6c95e5334fab597b 3.3.4-beta5 28e100d0379a2b0710c57647a28fc5239d3d7b99 3.3.4-release -6dfb0fba782c9233dd95f24ec48146db0d3f210b DRTVWR-199 -7c9102fb998885621919f2474a002c35b583539b 3.3.4-release2 -8c9085066c78ed5f6c9379dc054c82a6fcdb1851 DRTVWR-207 -351eea5f9dc192fc5ddea3b02958de97677a0a12 3.3.4-release3 005dfe5c4c377207d065fb27858d2eb0b53b143a DRTVWR-167 888768f162d2c0a8de1dcc5fb9a08bd8bd120a6b DRTVWR-175 4ad8a3afe40e0200309e3ada68932c4295ac2795 DRTVWR-179 @@ -298,14 +294,17 @@ ae5c83dd61d2d37c45f1d5b8bf2b036d87599f1b DRTVWR-198 b1dbb1a83f48f93f6f878cff9e52d2cb635e145c 3.4.0-beta2 37402e2b19af970d51b0a814d79892cc5647532b DRTVWR-200 182a9bf30e81070361bb020a78003b1cf398e79c 3.4.0-beta3 +6dfb0fba782c9233dd95f24ec48146db0d3f210b DRTVWR-199 +7c9102fb998885621919f2474a002c35b583539b 3.3.4-release2 7649a3dff5ec22d3727377e5f02efd0f421e4cb5 DRTVWR-201 84fb70dfe3444e75a44fb4bee43e2fc8221cebdd 3.4.0-beta4 573e863be2f26d3687161def4b9fea9b7038dda8 3.4.0-beta5 +8c9085066c78ed5f6c9379dc054c82a6fcdb1851 DRTVWR-207 +351eea5f9dc192fc5ddea3b02958de97677a0a12 3.3.4-release3 af7b28e75bd5a629cd9e0dc46fb3f1757626f493 DRTVWR-212 015012c2b740ccdec8a8c3d6e5f898449ecfe0b8 DRTVWR-213 62b07aa81b1957897c3846292bb9412977b0af6c 3.3.4-beta6 -d02759655d6b36d60f4a927e4bfce82844a82ef5 3.4.0-release +baa627938dbb7956f45e1eebef11ffe9e7f2e5bc 3.4.1-beta1 ceed0b65a69f1eac20d523e0203320a32f9a3f3c DRTVWR-215 733ceac77583874f3626ef7a15c105b83ef0f5bb 3.4.0-beta7 -d02759655d6b36d60f4a927e4bfce82844a82ef5 3.4.0-release 97977c67245f52db20eb15f1918cc0f24778cabc 3.4.0-release diff --git a/indra/cmake/Copy3rdPartyLibs.cmake b/indra/cmake/Copy3rdPartyLibs.cmake index 224e0a8b51..9f05c4cff2 100644 --- a/indra/cmake/Copy3rdPartyLibs.cmake +++ b/indra/cmake/Copy3rdPartyLibs.cmake @@ -57,10 +57,10 @@ if(WINDOWS) libhunspell.dll ) - if(USE_GOOGLE_PERFTOOLS) + if(USE_TCMALLOC) set(debug_files ${debug_files} libtcmalloc_minimal-debug.dll) set(release_files ${release_files} libtcmalloc_minimal.dll) - endif(USE_GOOGLE_PERFTOOLS) + endif(USE_TCMALLOC) if (FMOD) set(debug_files ${debug_files} fmod.dll) @@ -272,13 +272,16 @@ elseif(LINUX) libopenal.so libopenjpeg.so libssl.so - libtcmalloc_minimal.so libuuid.so.16 libuuid.so.16.0.22 libssl.so.1.0.0 libfontconfig.so.1.4.4 ) + if (USE_TCMALLOC) + set(release_files ${release_files} "libtcmalloc_minimal.so") + endif (USE_TCMALLOC) + if (FMOD) set(release_files ${release_files} "libfmod-3.75.so") endif (FMOD) diff --git a/indra/cmake/GooglePerfTools.cmake b/indra/cmake/GooglePerfTools.cmake index d9f91193be..73b3642ae6 100644 --- a/indra/cmake/GooglePerfTools.cmake +++ b/indra/cmake/GooglePerfTools.cmake @@ -1,20 +1,34 @@ # -*- cmake -*- include(Prebuilt) +# If you want to enable or disable TCMALLOC in viewer builds, this is the place. +# set ON or OFF as desired. +set (USE_TCMALLOC OFF) + if (STANDALONE) include(FindGooglePerfTools) else (STANDALONE) if (WINDOWS) - use_prebuilt_binary(tcmalloc) - set(TCMALLOC_LIBRARIES - debug libtcmalloc_minimal-debug - optimized libtcmalloc_minimal) + if (USE_TCMALLOC) + use_prebuilt_binary(tcmalloc) + set(TCMALLOC_LIBRARIES + debug libtcmalloc_minimal-debug + optimized libtcmalloc_minimal) + set(TCMALLOC_LINK_FLAGS "/INCLUDE:__tcmalloc") + else (USE_TCMALLOC) + set(TCMALLOC_LIBRARIES) + set(TCMALLOC_LINK_FLAGS) + endif (USE_TCMALLOC) set(GOOGLE_PERFTOOLS_FOUND "YES") endif (WINDOWS) if (LINUX) - use_prebuilt_binary(tcmalloc) - set(TCMALLOC_LIBRARIES - tcmalloc) + if (USE_TCMALLOC) + use_prebuilt_binary(tcmalloc) + set(TCMALLOC_LIBRARIES + tcmalloc) + else (USE_TCMALLOC) + set(TCMALLOC_LIBRARIES) + endif (USE_TCMALLOC) set(PROFILER_LIBRARIES profiler) set(GOOGLE_PERFTOOLS_INCLUDE_DIR ${LIBS_PREBUILT_DIR}/include) @@ -29,13 +43,19 @@ if (GOOGLE_PERFTOOLS_FOUND) endif (GOOGLE_PERFTOOLS_FOUND) if (WINDOWS) - set(USE_GOOGLE_PERFTOOLS ON) + set(USE_GOOGLE_PERFTOOLS ON) endif (WINDOWS) if (USE_GOOGLE_PERFTOOLS) - set(TCMALLOC_FLAG -ULL_USE_TCMALLOC=1) + if (USE_TCMALLOC) + set(TCMALLOC_FLAG -DLL_USE_TCMALLOC=1) + else (USE_TCMALLOC) + set(TCMALLOC_FLAG -ULL_USE_TCMALLOC) + endif (USE_TCMALLOC) +endif (USE_GOOGLE_PERFTOOLS) + +if (USE_GOOGLE_PERFTOOLS) include_directories(${GOOGLE_PERFTOOLS_INCLUDE_DIR}) set(GOOGLE_PERFTOOLS_LIBRARIES ${TCMALLOC_LIBRARIES} ${STACKTRACE_LIBRARIES} ${PROFILER_LIBRARIES}) else (USE_GOOGLE_PERFTOOLS) - set(TCMALLOC_FLAG -ULL_USE_TCMALLOC) endif (USE_GOOGLE_PERFTOOLS) diff --git a/indra/cmake/LLAddBuildTest.cmake b/indra/cmake/LLAddBuildTest.cmake index 03ce46781c..543075db5b 100755 --- a/indra/cmake/LLAddBuildTest.cmake +++ b/indra/cmake/LLAddBuildTest.cmake @@ -214,6 +214,15 @@ FUNCTION(LL_ADD_INTEGRATION_TEST SET_TARGET_PROPERTIES(INTEGRATION_TEST_${testname} PROPERTIES COMPILE_FLAGS -I"${TUT_INCLUDE_DIR}") endif(STANDALONE) + if (WINDOWS) + SET_TARGET_PROPERTIES(INTEGRATION_TEST_${testname} + PROPERTIES + LINK_FLAGS "/debug /NODEFAULTLIB:LIBCMT /SUBSYSTEM:WINDOWS ${TCMALLOC_LINK_FLAGS}" + LINK_FLAGS_DEBUG "/NODEFAULTLIB:\"LIBCMT;LIBCMTD;MSVCRT\" /INCREMENTAL:NO" + LINK_FLAGS_RELEASE "" + ) + endif (WINDOWS) + # Add link deps to the executable if(TEST_DEBUG) message(STATUS "TARGET_LINK_LIBRARIES(INTEGRATION_TEST_${testname} ${libraries})") diff --git a/indra/llcommon/llallocator.cpp b/indra/llcommon/llallocator.cpp index 6f6abefc67..87654b5b97 100644 --- a/indra/llcommon/llallocator.cpp +++ b/indra/llcommon/llallocator.cpp @@ -27,7 +27,7 @@ #include "linden_common.h" #include "llallocator.h" -#if LL_USE_TCMALLOC +#if (LL_USE_TCMALLOC && LL_USE_HEAP_PROFILER) #include "google/heap-profiler.h" #include "google/commandlineflags_public.h" diff --git a/indra/llcommon/llmemory.cpp b/indra/llcommon/llmemory.cpp index 3b9758f996..afaf366668 100644 --- a/indra/llcommon/llmemory.cpp +++ b/indra/llcommon/llmemory.cpp @@ -61,6 +61,18 @@ BOOL LLMemory::sEnableMemoryFailurePrevention = FALSE; LLPrivateMemoryPoolManager::mem_allocation_info_t LLPrivateMemoryPoolManager::sMemAllocationTracker; #endif +void ll_assert_aligned_func(uintptr_t ptr,U32 alignment) +{ +#ifdef SHOW_ASSERT + // Redundant, place to set breakpoints. + if (ptr%alignment!=0) + { + llwarns << "alignment check failed" << llendl; + } + llassert(ptr%alignment==0); +#endif +} + //static void LLMemory::initClass() { diff --git a/indra/llcommon/llmemory.h b/indra/llcommon/llmemory.h index bbbdaa6497..f37e8459ff 100644 --- a/indra/llcommon/llmemory.h +++ b/indra/llcommon/llmemory.h @@ -27,7 +27,6 @@ #define LLMEMORY_H #include "llmemtype.h" -#if LL_DEBUG inline void* ll_aligned_malloc( size_t size, int align ) { void* mem = malloc( size + (align - 1) + sizeof(void*) ); @@ -43,10 +42,11 @@ inline void ll_aligned_free( void* ptr ) free( ((void**)ptr)[-1] ); } +#if !LL_USE_TCMALLOC inline void* ll_aligned_malloc_16(size_t size) // returned hunk MUST be freed with ll_aligned_free_16(). { #if defined(LL_WINDOWS) - return _mm_malloc(size, 16); + return _aligned_malloc(size, 16); #elif defined(LL_DARWIN) return malloc(size); // default osx malloc is 16 byte aligned. #else @@ -61,7 +61,7 @@ inline void* ll_aligned_malloc_16(size_t size) // returned hunk MUST be freed wi inline void ll_aligned_free_16(void *p) { #if defined(LL_WINDOWS) - _mm_free(p); + _aligned_free(p); #elif defined(LL_DARWIN) return free(p); #else @@ -69,10 +69,35 @@ inline void ll_aligned_free_16(void *p) #endif } +inline void* ll_aligned_realloc_16(void* ptr, size_t size, size_t old_size) // returned hunk MUST be freed with ll_aligned_free_16(). +{ +#if defined(LL_WINDOWS) + return _aligned_realloc(ptr, size, 16); +#elif defined(LL_DARWIN) + return realloc(ptr,size); // default osx malloc is 16 byte aligned. +#else + //FIXME: memcpy is SLOW + void* ret = ll_aligned_malloc_16(size); + if (ptr) + { + memcpy(ret, ptr, old_size); + ll_aligned_free_16(ptr); + } + return ret; +#endif +} + +#else // USE_TCMALLOC +// ll_aligned_foo_16 are not needed with tcmalloc +#define ll_aligned_malloc_16 malloc +#define ll_aligned_realloc_16 realloc +#define ll_aligned_free_16 free +#endif // USE_TCMALLOC + inline void* ll_aligned_malloc_32(size_t size) // returned hunk MUST be freed with ll_aligned_free_32(). { #if defined(LL_WINDOWS) - return _mm_malloc(size, 32); + return _aligned_malloc(size, 32); #elif defined(LL_DARWIN) return ll_aligned_malloc( size, 32 ); #else @@ -87,22 +112,13 @@ inline void* ll_aligned_malloc_32(size_t size) // returned hunk MUST be freed wi inline void ll_aligned_free_32(void *p) { #if defined(LL_WINDOWS) - _mm_free(p); + _aligned_free(p); #elif defined(LL_DARWIN) ll_aligned_free( p ); #else free(p); // posix_memalign() is compatible with heap deallocator #endif } -#else // LL_DEBUG -// ll_aligned_foo are noops now that we use tcmalloc everywhere (tcmalloc aligns automatically at appropriate intervals) -#define ll_aligned_malloc( size, align ) malloc(size) -#define ll_aligned_free( ptr ) free(ptr) -#define ll_aligned_malloc_16 malloc -#define ll_aligned_free_16 free -#define ll_aligned_malloc_32 malloc -#define ll_aligned_free_32 free -#endif // LL_DEBUG #ifndef __DEBUG_PRIVATE_MEM__ #define __DEBUG_PRIVATE_MEM__ 0 @@ -512,4 +528,13 @@ void LLPrivateMemoryPoolTester::operator delete[](void* addr) // LLSingleton moved to llsingleton.h +LL_COMMON_API void ll_assert_aligned_func(uintptr_t ptr,U32 alignment); + +#ifdef SHOW_ASSERT +#define ll_assert_aligned(ptr,alignment) ll_assert_aligned_func(reinterpret_cast<uintptr_t>(ptr),((U32)alignment)) +#else +#define ll_assert_aligned(ptr,alignment) +#endif + + #endif diff --git a/indra/llcommon/llversionviewer.h b/indra/llcommon/llversionviewer.h index 2038681905..bcc661a920 100644 --- a/indra/llcommon/llversionviewer.h +++ b/indra/llcommon/llversionviewer.h @@ -29,7 +29,7 @@ const S32 LL_VERSION_MAJOR = 3; const S32 LL_VERSION_MINOR = 4; -const S32 LL_VERSION_PATCH = 0; +const S32 LL_VERSION_PATCH = 1; const S32 LL_VERSION_BUILD = 0; const char * const LL_CHANNEL = "Second Life Developer"; diff --git a/indra/llmath/CMakeLists.txt b/indra/llmath/CMakeLists.txt index b5e59c1ca3..5865ae030c 100644 --- a/indra/llmath/CMakeLists.txt +++ b/indra/llmath/CMakeLists.txt @@ -117,6 +117,7 @@ if (LL_TESTS) # INTEGRATION TESTS set(test_libs llmath llcommon ${LLCOMMON_LIBRARIES} ${WINDOWS_LIBRARIES}) # TODO: Some of these need refactoring to be proper Unit tests rather than Integration tests. + LL_ADD_INTEGRATION_TEST(alignment "" "${test_libs}") LL_ADD_INTEGRATION_TEST(llbbox llbbox.cpp "${test_libs}") LL_ADD_INTEGRATION_TEST(llquaternion llquaternion.cpp "${test_libs}") LL_ADD_INTEGRATION_TEST(mathmisc "" "${test_libs}") diff --git a/indra/llmath/llcamera.h b/indra/llmath/llcamera.h index ec67b91d05..0b591be622 100644 --- a/indra/llmath/llcamera.h +++ b/indra/llmath/llcamera.h @@ -60,7 +60,7 @@ static const F32 MAX_FIELD_OF_VIEW = 175.f * DEG_TO_RAD; // roll(), pitch(), yaw() // etc... - +LL_ALIGN_PREFIX(16) class LLCamera : public LLCoordFrame { @@ -108,7 +108,7 @@ public: }; private: - LLPlane mAgentPlanes[7]; //frustum planes in agent space a la gluUnproject (I'm a bastard, I know) - DaveP + LL_ALIGN_16(LLPlane mAgentPlanes[7]); //frustum planes in agent space a la gluUnproject (I'm a bastard, I know) - DaveP U8 mPlaneMask[8]; // 8 for alignment F32 mView; // angle between top and bottom frustum planes in radians. @@ -116,13 +116,13 @@ private: S32 mViewHeightInPixels; // for ViewHeightInPixels() only F32 mNearPlane; F32 mFarPlane; - LLPlane mLocalPlanes[4]; + LL_ALIGN_16(LLPlane mLocalPlanes[4]); F32 mFixedDistance; // Always return this distance, unless < 0 LLVector3 mFrustCenter; // center of frustum and radius squared for ultra-quick exclusion test F32 mFrustRadiusSquared; - LLPlane mWorldPlanes[PLANE_NUM]; - LLPlane mHorizPlanes[HORIZ_PLANE_NUM]; + LL_ALIGN_16(LLPlane mWorldPlanes[PLANE_NUM]); + LL_ALIGN_16(LLPlane mHorizPlanes[HORIZ_PLANE_NUM]); U32 mPlaneCount; //defaults to 6, if setUserClipPlane is called, uses user supplied clip plane in @@ -208,7 +208,7 @@ protected: void calculateFrustumPlanes(F32 left, F32 right, F32 top, F32 bottom); void calculateFrustumPlanesFromWindow(F32 x1, F32 y1, F32 x2, F32 y2); void calculateWorldFrustumPlanes(); -}; +} LL_ALIGN_POSTFIX(16); #endif diff --git a/indra/llmath/llmatrix3a.h b/indra/llmath/llmatrix3a.h index adb7e3389d..9916cfd2da 100644 --- a/indra/llmath/llmatrix3a.h +++ b/indra/llmath/llmatrix3a.h @@ -111,7 +111,7 @@ public: protected: - LLVector4a mColumns[3]; + LL_ALIGN_16(LLVector4a mColumns[3]); }; diff --git a/indra/llmath/llmatrix4a.h b/indra/llmath/llmatrix4a.h index 27cf5b79f6..c4cefdb4fa 100644 --- a/indra/llmath/llmatrix4a.h +++ b/indra/llmath/llmatrix4a.h @@ -34,7 +34,7 @@ class LLMatrix4a { public: - LLVector4a mMatrix[4]; + LL_ALIGN_16(LLVector4a mMatrix[4]); inline void clear() { diff --git a/indra/llmath/lloctree.h b/indra/llmath/lloctree.h index 1b11e83b4a..6c7744cdf1 100644 --- a/indra/llmath/lloctree.h +++ b/indra/llmath/lloctree.h @@ -88,7 +88,7 @@ public: typedef LLOctreeNode<T> oct_node; typedef LLOctreeListener<T> oct_listener; - /*void* operator new(size_t size) + void* operator new(size_t size) { return ll_aligned_malloc_16(size); } @@ -96,7 +96,7 @@ public: void operator delete(void* ptr) { ll_aligned_free_16(ptr); - }*/ + } LLOctreeNode( const LLVector4a& center, const LLVector4a& size, diff --git a/indra/llmath/llplane.h b/indra/llmath/llplane.h index a611894721..3c32441b11 100644 --- a/indra/llmath/llplane.h +++ b/indra/llmath/llplane.h @@ -36,6 +36,8 @@ // The plane normal = [A, B, C] // The closest approach = D / sqrt(A*A + B*B + C*C) + +LL_ALIGN_PREFIX(16) class LLPlane { public: @@ -94,7 +96,7 @@ public: private: LLVector4a mV; -}; +} LL_ALIGN_POSTFIX(16); diff --git a/indra/llmath/llsimdmath.h b/indra/llmath/llsimdmath.h index c7cdf7b32c..01458521ec 100644 --- a/indra/llmath/llsimdmath.h +++ b/indra/llmath/llsimdmath.h @@ -67,11 +67,10 @@ template <typename T> T* LL_NEXT_ALIGNED_ADDRESS_64(T* address) #define LL_ALIGN_16(var) LL_ALIGN_PREFIX(16) var LL_ALIGN_POSTFIX(16) - - #include <xmmintrin.h> #include <emmintrin.h> +#include "llmemory.h" #include "llsimdtypes.h" #include "llsimdtypes.inl" diff --git a/indra/llmath/llsimdtypes.inl b/indra/llmath/llsimdtypes.inl index 712239e425..e905c84954 100644 --- a/indra/llmath/llsimdtypes.inl +++ b/indra/llmath/llsimdtypes.inl @@ -62,6 +62,7 @@ inline LLSimdScalar operator/(const LLSimdScalar& a, const LLSimdScalar& b) inline LLSimdScalar operator-(const LLSimdScalar& a) { static LL_ALIGN_16(const U32 signMask[4]) = {0x80000000, 0x80000000, 0x80000000, 0x80000000 }; + ll_assert_aligned(signMask,16); return _mm_xor_ps(*reinterpret_cast<const LLQuad*>(signMask), a); } @@ -146,6 +147,7 @@ inline LLSimdScalar& LLSimdScalar::operator/=(const LLSimdScalar& rhs) inline LLSimdScalar LLSimdScalar::getAbs() const { static const LL_ALIGN_16(U32 F_ABS_MASK_4A[4]) = { 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF }; + ll_assert_aligned(F_ABS_MASK_4A,16); return _mm_and_ps( mQ, *reinterpret_cast<const LLQuad*>(F_ABS_MASK_4A)); } diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp index b66b7a7076..6edeb0fefe 100644 --- a/indra/llmath/llvector4a.cpp +++ b/indra/llmath/llvector4a.cpp @@ -24,6 +24,7 @@ * $/LicenseInfo$ */ +#include "llmemory.h" #include "llmath.h" #include "llquantize.h" @@ -44,7 +45,10 @@ extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast<const LLVector4a&> ( F assert(dst != NULL); assert(bytes > 0); assert((bytes % sizeof(F32))== 0); - + ll_assert_aligned(src,16); + ll_assert_aligned(dst,16); + assert(bytes%16==0); + F32* end = dst + (bytes / sizeof(F32) ); if (bytes > 64) @@ -189,6 +193,8 @@ void LLVector4a::quantize16( const LLVector4a& low, const LLVector4a& high ) LLVector4a oneOverDelta; { static LL_ALIGN_16( const F32 F_TWO_4A[4] ) = { 2.f, 2.f, 2.f, 2.f }; + ll_assert_aligned(F_TWO_4A,16); + LLVector4a two; two.load4a( F_TWO_4A ); // Here we use _mm_rcp_ps plus one round of newton-raphson diff --git a/indra/llmath/llvector4a.h b/indra/llmath/llvector4a.h index 596082509d..0526793d3a 100644 --- a/indra/llmath/llvector4a.h +++ b/indra/llmath/llvector4a.h @@ -32,6 +32,7 @@ class LLRotation; #include <assert.h> #include "llpreprocessor.h" +#include "llmemory.h" /////////////////////////////////// // FIRST TIME USERS PLEASE READ @@ -46,6 +47,7 @@ class LLRotation; // LLVector3/LLVector4. ///////////////////////////////// +LL_ALIGN_PREFIX(16) class LLVector4a { public: @@ -82,6 +84,7 @@ public: } // Copy words 16-byte blocks from src to dst. Source and destination must not overlap. + // Source and dest must be 16-byte aligned and size must be multiple of 16. static void memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes); //////////////////////////////////// @@ -90,6 +93,7 @@ public: LLVector4a() { //DO NOT INITIALIZE -- The overhead is completely unnecessary + ll_assert_aligned(this,16); } LLVector4a(F32 x, F32 y, F32 z, F32 w = 0.f) @@ -313,7 +317,7 @@ public: private: LLQuad mQ; -}; +} LL_ALIGN_POSTFIX(16); inline void update_min_max(LLVector4a& min, LLVector4a& max, const LLVector4a& p) { diff --git a/indra/llmath/llvector4a.inl b/indra/llmath/llvector4a.inl index 7ad22a5631..7c52ffef21 100644 --- a/indra/llmath/llvector4a.inl +++ b/indra/llmath/llvector4a.inl @@ -475,6 +475,7 @@ inline void LLVector4a::setLerp(const LLVector4a& lhs, const LLVector4a& rhs, F3 inline LLBool32 LLVector4a::isFinite3() const { static LL_ALIGN_16(const U32 nanOrInfMask[4]) = { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 }; + ll_assert_aligned(nanOrInfMask,16); const __m128i nanOrInfMaskV = *reinterpret_cast<const __m128i*> (nanOrInfMask); const __m128i maskResult = _mm_and_si128( _mm_castps_si128(mQ), nanOrInfMaskV ); const LLVector4Logical equalityCheck = _mm_castsi128_ps(_mm_cmpeq_epi32( maskResult, nanOrInfMaskV )); diff --git a/indra/llmath/llvector4logical.h b/indra/llmath/llvector4logical.h index dd66b09d43..c5698f7cea 100644 --- a/indra/llmath/llvector4logical.h +++ b/indra/llmath/llvector4logical.h @@ -27,6 +27,7 @@ #ifndef LL_VECTOR4LOGICAL_H #define LL_VECTOR4LOGICAL_H +#include "llmemory.h" //////////////////////////// // LLVector4Logical @@ -77,6 +78,7 @@ public: inline LLVector4Logical& invert() { static const LL_ALIGN_16(U32 allOnes[4]) = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF }; + ll_assert_aligned(allOnes,16); mQ = _mm_andnot_ps( mQ, *(LLQuad*)(allOnes) ); return *this; } diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index cc9744756f..ea09a3afe7 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -95,17 +95,6 @@ const S32 SCULPT_MIN_AREA_DETAIL = 1; extern BOOL gDebugGL; -void assert_aligned(void* ptr, uintptr_t alignment) -{ -#if 0 - uintptr_t t = (uintptr_t) ptr; - if (t%alignment != 0) - { - llerrs << "Alignment check failed." << llendl; - } -#endif -} - BOOL check_same_clock_dir( const LLVector3& pt1, const LLVector3& pt2, const LLVector3& pt3, const LLVector3& norm) { LLVector3 test = (pt2-pt1)%(pt3-pt2); @@ -6962,14 +6951,14 @@ void LLVolumeFace::resizeVertices(S32 num_verts) if (num_verts) { mPositions = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); - assert_aligned(mPositions, 16); + ll_assert_aligned(mPositions, 16); mNormals = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); - assert_aligned(mNormals, 16); + ll_assert_aligned(mNormals, 16); //pad texture coordinate block end to allow for QWORD reads S32 size = ((num_verts*sizeof(LLVector2)) + 0xF) & ~0xF; mTexCoords = (LLVector2*) ll_aligned_malloc_16(size); - assert_aligned(mTexCoords, 16); + ll_assert_aligned(mTexCoords, 16); } else { @@ -6990,17 +6979,21 @@ void LLVolumeFace::pushVertex(const LLVector4a& pos, const LLVector4a& norm, con { S32 new_verts = mNumVertices+1; S32 new_size = new_verts*16; -// S32 old_size = mNumVertices*16; + S32 old_size = mNumVertices*16; //positions - mPositions = (LLVector4a*) realloc(mPositions, new_size); + mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_size, old_size); + ll_assert_aligned(mPositions,16); //normals - mNormals = (LLVector4a*) realloc(mNormals, new_size); - + mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_size, old_size); + ll_assert_aligned(mNormals,16); + //tex coords new_size = ((new_verts*8)+0xF) & ~0xF; - mTexCoords = (LLVector2*) realloc(mTexCoords, new_size); + old_size = ((mNumVertices*8)+0xF) & ~0xF; + mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, new_size, old_size); + ll_assert_aligned(mTexCoords,16); //just clear binormals @@ -7053,7 +7046,8 @@ void LLVolumeFace::pushIndex(const U16& idx) S32 old_size = ((mNumIndices*2)+0xF) & ~0xF; if (new_size != old_size) { - mIndices = (U16*) realloc(mIndices, new_size); + mIndices = (U16*) ll_aligned_realloc_16(mIndices, new_size, old_size); + ll_assert_aligned(mIndices,16); } mIndices[mNumIndices++] = idx; @@ -7094,12 +7088,12 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat } //allocate new buffer space - mPositions = (LLVector4a*) realloc(mPositions, new_count*sizeof(LLVector4a)); - assert_aligned(mPositions, 16); - mNormals = (LLVector4a*) realloc(mNormals, new_count*sizeof(LLVector4a)); - assert_aligned(mNormals, 16); - mTexCoords = (LLVector2*) realloc(mTexCoords, (new_count*sizeof(LLVector2)+0xF) & ~0xF); - assert_aligned(mTexCoords, 16); + mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_count*sizeof(LLVector4a), mNumVertices*sizeof(LLVector4a)); + ll_assert_aligned(mPositions, 16); + mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_count*sizeof(LLVector4a), mNumVertices*sizeof(LLVector4a)); + ll_assert_aligned(mNormals, 16); + mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, (new_count*sizeof(LLVector2)+0xF) & ~0xF, (mNumVertices*sizeof(LLVector2)+0xF) & ~0xF); + ll_assert_aligned(mTexCoords, 16); mNumVertices = new_count; @@ -7145,7 +7139,7 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat new_count = mNumIndices + face.mNumIndices; //allocate new index buffer - mIndices = (U16*) realloc(mIndices, (new_count*sizeof(U16)+0xF) & ~0xF); + mIndices = (U16*) ll_aligned_realloc_16(mIndices, (new_count*sizeof(U16)+0xF) & ~0xF, (mNumIndices*sizeof(U16)+0xF) & ~0xF); //get destination address into new index buffer U16* dst_idx = mIndices+mNumIndices; diff --git a/indra/llmath/llvolumeoctree.h b/indra/llmath/llvolumeoctree.h index 688d91dc40..dac97b14b5 100644 --- a/indra/llmath/llvolumeoctree.h +++ b/indra/llmath/llvolumeoctree.h @@ -37,6 +37,16 @@ class LLVolumeTriangle : public LLRefCount { public: + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + LLVolumeTriangle() { @@ -58,7 +68,7 @@ public: } - LLVector4a mPositionGroup; + LL_ALIGN_16(LLVector4a mPositionGroup); const LLVector4a* mV[3]; U16 mIndex[3]; @@ -73,6 +83,16 @@ class LLVolumeOctreeListener : public LLOctreeListener<LLVolumeTriangle> { public: + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + LLVolumeOctreeListener(LLOctreeNode<LLVolumeTriangle>* node); ~LLVolumeOctreeListener(); @@ -99,8 +119,8 @@ public: public: - LLVector4a mBounds[2]; // bounding box (center, size) of this node and all its children (tight fit to objects) - LLVector4a mExtents[2]; // extents (min, max) of this node and all its children + LL_ALIGN_16(LLVector4a mBounds[2]); // bounding box (center, size) of this node and all its children (tight fit to objects) + LL_ALIGN_16(LLVector4a mExtents[2]); // extents (min, max) of this node and all its children }; class LLOctreeTriangleRayIntersect : public LLOctreeTraveler<LLVolumeTriangle> diff --git a/indra/llmath/tests/alignment_test.cpp b/indra/llmath/tests/alignment_test.cpp index ac0c45ae6f..9105b1c1fd 100644 --- a/indra/llmath/tests/alignment_test.cpp +++ b/indra/llmath/tests/alignment_test.cpp @@ -34,16 +34,6 @@ #include "../llsimdmath.h" #include "../llvector4a.h" -void* operator new(size_t size) -{ - return ll_aligned_malloc_16(size); -} - -void operator delete(void *p) -{ - ll_aligned_free_16(p); -} - namespace tut { @@ -59,6 +49,27 @@ tut::alignment_test_t tut_alignment_test("LLAlignment"); LL_ALIGN_PREFIX(16) class MyVector4a { +public: + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void *p) + { + ll_aligned_free_16(p); + } + + void* operator new[](size_t count) + { // try to allocate count bytes for an array + return ll_aligned_malloc_16(count); + } + + void operator delete[](void *p) + { + ll_aligned_free_16(p); + } + LLQuad mQ; } LL_ALIGN_POSTFIX(16); @@ -78,7 +89,7 @@ void alignment_test_object_t::test<1>() align_ptr = ll_aligned_malloc_16(sizeof(MyVector4a)); ensure("ll_aligned_malloc_16 failed", is_aligned(align_ptr,16)); - align_ptr = ll_aligned_realloc_16(align_ptr,2*sizeof(MyVector4a)); + align_ptr = ll_aligned_realloc_16(align_ptr,2*sizeof(MyVector4a), sizeof(MyVector4a)); ensure("ll_aligned_realloc_16 failed", is_aligned(align_ptr,16)); ll_aligned_free_16(align_ptr); diff --git a/indra/llprimitive/llmodel.cpp b/indra/llprimitive/llmodel.cpp index cb32a510b8..28ed051c55 100644 --- a/indra/llprimitive/llmodel.cpp +++ b/indra/llprimitive/llmodel.cpp @@ -1026,7 +1026,8 @@ void LLModel::setVolumeFaceData( if (tc.get()) { - LLVector4a::memcpyNonAliased16((F32*) face.mTexCoords, (F32*) tc.get(), num_verts*2*sizeof(F32)); + U32 tex_size = (num_verts*2*sizeof(F32)+0xF)&~0xF; + LLVector4a::memcpyNonAliased16((F32*) face.mTexCoords, (F32*) tc.get(), tex_size); } else { diff --git a/indra/newview/CMakeLists.txt b/indra/newview/CMakeLists.txt index c0a252637f..1809616042 100644..100755 --- a/indra/newview/CMakeLists.txt +++ b/indra/newview/CMakeLists.txt @@ -1565,8 +1565,7 @@ if (WINDOWS) set_target_properties(${VIEWER_BINARY_NAME} PROPERTIES # *TODO -reenable this once we get server usage sorted out - #LINK_FLAGS "/debug /NODEFAULTLIB:LIBCMT /SUBSYSTEM:WINDOWS /INCLUDE:\"__tcmalloc\"" - LINK_FLAGS "/debug /NODEFAULTLIB:LIBCMT /SUBSYSTEM:WINDOWS /INCLUDE:__tcmalloc " + LINK_FLAGS "/debug /NODEFAULTLIB:LIBCMT /SUBSYSTEM:WINDOWS ${TCMALLOC_LINK_FLAGS}" LINK_FLAGS_DEBUG "/NODEFAULTLIB:\"LIBCMT;LIBCMTD;MSVCRT\" /INCREMENTAL:NO" LINK_FLAGS_RELEASE "/FORCE:MULTIPLE /MAP\"secondlife-bin.MAP\" /OPT:REF" ) @@ -1585,7 +1584,7 @@ if (WINDOWS) # In the meantime, if you have any ideas on how to easily maintain one list, either here or in viewer_manifest.py # and have the build deps get tracked *please* tell me about it. - if(USE_GOOGLE_PERFTOOLS) + if(USE_TCMALLOC) # Configure a var for tcmalloc location, if used. # Note the need to specify multiple names explicitly. set(GOOGLE_PERF_TOOLS_SOURCE @@ -1593,7 +1592,7 @@ if (WINDOWS) ${SHARED_LIB_STAGING_DIR}/RelWithDebInfo/libtcmalloc_minimal.dll ${SHARED_LIB_STAGING_DIR}/Debug/libtcmalloc_minimal-debug.dll ) - endif(USE_GOOGLE_PERFTOOLS) + endif(USE_TCMALLOC) set(COPY_INPUT_DEPENDENCIES diff --git a/indra/newview/llappviewerwin32.cpp b/indra/newview/llappviewerwin32.cpp index bad60a9757..53c77fa22e 100644 --- a/indra/newview/llappviewerwin32.cpp +++ b/indra/newview/llappviewerwin32.cpp @@ -130,6 +130,8 @@ int APIENTRY WINMAIN(HINSTANCE hInstance, // This results in a 2-3x improvement in opening a new Inventory window (which uses a large numebr of allocations) // Note: This won't work when running from the debugger unless the _NO_DEBUG_HEAP environment variable is set to 1 + // Enable to get mem debugging within visual studio. + //_CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF); _CrtSetDbgFlag(0); // default, just making explicit ULONG ulEnableLFH = 2; diff --git a/indra/newview/lldrawable.h b/indra/newview/lldrawable.h index e2064b79f8..8c7db61502 100644 --- a/indra/newview/lldrawable.h +++ b/indra/newview/lldrawable.h @@ -59,6 +59,7 @@ class LLViewerTexture; const U32 SILHOUETTE_HIGHLIGHT = 0; // All data for new renderer goes into this class. +LL_ALIGN_PREFIX(16) class LLDrawable : public LLRefCount { public: @@ -75,6 +76,16 @@ public: static void initClass(); + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + LLDrawable() { init(); } MEM_TYPE_NEW(LLMemType::MTYPE_DRAWABLE); @@ -281,8 +292,8 @@ public: } EDrawableFlags; private: //aligned members - LLVector4a mExtents[2]; - LLVector4a mPositionGroup; + LL_ALIGN_16(LLVector4a mExtents[2]); + LL_ALIGN_16(LLVector4a mPositionGroup); public: LLXformMatrix mXform; @@ -323,7 +334,7 @@ private: static U32 sNumZombieDrawables; static LLDynamicArrayPtr<LLPointer<LLDrawable> > sDeadList; -}; +} LL_ALIGN_POSTFIX(16); inline LLFace* LLDrawable::getFace(const S32 i) const diff --git a/indra/newview/lldynamictexture.h b/indra/newview/lldynamictexture.h index e18090545d..c51e7d1e1a 100644 --- a/indra/newview/lldynamictexture.h +++ b/indra/newview/lldynamictexture.h @@ -36,6 +36,16 @@ class LLViewerDynamicTexture : public LLViewerTexture { public: + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + enum { LL_VIEWER_DYNAMIC_TEXTURE = LLViewerTexture::DYNAMIC_TEXTURE, @@ -85,7 +95,7 @@ protected: protected: BOOL mClamp; LLCoordGL mOrigin; - LLCamera mCamera; + LL_ALIGN_16(LLCamera mCamera); typedef std::set<LLViewerDynamicTexture*> instance_list_t; static instance_list_t sInstances[ LLViewerDynamicTexture::ORDER_COUNT ]; diff --git a/indra/newview/llface.cpp b/indra/newview/llface.cpp index 373b1930f5..02a8d1244f 100644..100755 --- a/indra/newview/llface.cpp +++ b/indra/newview/llface.cpp @@ -1651,7 +1651,8 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume, if (!do_xform) { LLFastTimer t(FTM_FACE_TEX_QUICK_NO_XFORM); - LLVector4a::memcpyNonAliased16((F32*) tex_coords.get(), (F32*) vf.mTexCoords, num_vertices*2*sizeof(F32)); + S32 tc_size = (num_vertices*2*sizeof(F32)+0xF) & ~0xF; + LLVector4a::memcpyNonAliased16((F32*) tex_coords.get(), (F32*) vf.mTexCoords, tc_size); } else { diff --git a/indra/newview/llface.h b/indra/newview/llface.h index 76ea5c853a..5dca27487f 100644 --- a/indra/newview/llface.h +++ b/indra/newview/llface.h @@ -59,6 +59,17 @@ class LLFace { public: + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + + LLFace(const LLFace& rhs) { *this = rhs; diff --git a/indra/newview/llfloatermodelpreview.cpp b/indra/newview/llfloatermodelpreview.cpp index 3fe535cbe8..a071f338ba 100755 --- a/indra/newview/llfloatermodelpreview.cpp +++ b/indra/newview/llfloatermodelpreview.cpp @@ -4774,7 +4774,8 @@ void LLModelPreview::genBuffers(S32 lod, bool include_skin_weights) if (vf.mTexCoords) { vb->getTexCoord0Strider(tc_strider); - LLVector4a::memcpyNonAliased16((F32*) tc_strider.get(), (F32*) vf.mTexCoords, num_vertices*2*sizeof(F32)); + S32 tex_size = (num_vertices*2*sizeof(F32)+0xF) & ~0xF; + LLVector4a::memcpyNonAliased16((F32*) tc_strider.get(), (F32*) vf.mTexCoords, tex_size); } if (vf.mNormals) diff --git a/indra/newview/llpolymesh.cpp b/indra/newview/llpolymesh.cpp index 450f9b2be7..0860506086 100644 --- a/indra/newview/llpolymesh.cpp +++ b/indra/newview/llpolymesh.cpp @@ -129,22 +129,22 @@ void LLPolyMeshSharedData::freeMeshData() { mNumVertices = 0; - delete [] mBaseCoords; + ll_aligned_free_16(mBaseCoords); mBaseCoords = NULL; - delete [] mBaseNormals; + ll_aligned_free_16(mBaseNormals); mBaseNormals = NULL; - delete [] mBaseBinormals; + ll_aligned_free_16(mBaseBinormals); mBaseBinormals = NULL; - delete [] mTexCoords; + ll_aligned_free_16(mTexCoords); mTexCoords = NULL; - delete [] mDetailTexCoords; + ll_aligned_free_16(mDetailTexCoords); mDetailTexCoords = NULL; - delete [] mWeights; + ll_aligned_free_16(mWeights); mWeights = NULL; } @@ -229,12 +229,12 @@ U32 LLPolyMeshSharedData::getNumKB() BOOL LLPolyMeshSharedData::allocateVertexData( U32 numVertices ) { U32 i; - mBaseCoords = new LLVector3[ numVertices ]; - mBaseNormals = new LLVector3[ numVertices ]; - mBaseBinormals = new LLVector3[ numVertices ]; - mTexCoords = new LLVector2[ numVertices ]; - mDetailTexCoords = new LLVector2[ numVertices ]; - mWeights = new F32[ numVertices ]; + mBaseCoords = (LLVector3*) ll_aligned_malloc_16(numVertices*sizeof(LLVector3)); + mBaseNormals = (LLVector3*) ll_aligned_malloc_16(numVertices*sizeof(LLVector3)); + mBaseBinormals = (LLVector3*) ll_aligned_malloc_16(numVertices*sizeof(LLVector3)); + mTexCoords = (LLVector2*) ll_aligned_malloc_16(numVertices*sizeof(LLVector2)); + mDetailTexCoords = (LLVector2*) ll_aligned_malloc_16(numVertices*sizeof(LLVector2)); + mWeights = (F32*) ll_aligned_malloc_16(numVertices*sizeof(F32)); for (i = 0; i < numVertices; i++) { mWeights[i] = 0.f; diff --git a/indra/newview/llspatialpartition.cpp b/indra/newview/llspatialpartition.cpp index 325a2d3004..45ef8f1a6d 100644 --- a/indra/newview/llspatialpartition.cpp +++ b/indra/newview/llspatialpartition.cpp @@ -529,6 +529,7 @@ void LLSpatialGroup::setVisible() void LLSpatialGroup::validate() { + ll_assert_aligned(this,64); #if LL_OCTREE_PARANOIA_CHECK sg_assert(!isState(DIRTY)); @@ -1195,6 +1196,8 @@ LLSpatialGroup::LLSpatialGroup(OctreeNode* node, LLSpatialPartition* part) : mCurUpdatingSlotp(NULL), mCurUpdatingTexture (NULL) { + ll_assert_aligned(this,16); + sNodeCount++; LLMemType mt(LLMemType::MTYPE_SPACE_PARTITION); diff --git a/indra/newview/llspatialpartition.h b/indra/newview/llspatialpartition.h index f0e4f15a83..7968c28900 100644 --- a/indra/newview/llspatialpartition.h +++ b/indra/newview/llspatialpartition.h @@ -68,6 +68,16 @@ protected: ~LLDrawInfo(); public: + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + LLDrawInfo(const LLDrawInfo& rhs) { @@ -106,7 +116,7 @@ public: F32 mPartSize; F32 mVSize; LLSpatialGroup* mGroup; - LLFace* mFace; //associated face + LL_ALIGN_16(LLFace* mFace); //associated face F32 mDistance; U32 mDrawMode; @@ -181,7 +191,7 @@ public: }; }; -LL_ALIGN_PREFIX(64) +LL_ALIGN_PREFIX(16) class LLSpatialGroup : public LLOctreeListener<LLDrawable> { friend class LLSpatialPartition; @@ -193,6 +203,16 @@ public: *this = rhs; } + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + const LLSpatialGroup& operator=(const LLSpatialGroup& rhs) { llerrs << "Illegal operation!" << llendl; @@ -370,12 +390,12 @@ public: V4_COUNT = 10 } eV4Index; - LLVector4a mBounds[2]; // bounding box (center, size) of this node and all its children (tight fit to objects) - LLVector4a mExtents[2]; // extents (min, max) of this node and all its children - LLVector4a mObjectExtents[2]; // extents (min, max) of objects in this node - LLVector4a mObjectBounds[2]; // bounding box (center, size) of objects in this node - LLVector4a mViewAngle; - LLVector4a mLastUpdateViewAngle; + LL_ALIGN_16(LLVector4a mBounds[2]); // bounding box (center, size) of this node and all its children (tight fit to objects) + LL_ALIGN_16(LLVector4a mExtents[2]); // extents (min, max) of this node and all its children + LL_ALIGN_16(LLVector4a mObjectExtents[2]); // extents (min, max) of objects in this node + LL_ALIGN_16(LLVector4a mObjectBounds[2]); // bounding box (center, size) of objects in this node + LL_ALIGN_16(LLVector4a mViewAngle); + LL_ALIGN_16(LLVector4a mLastUpdateViewAngle); F32 mObjectBoxSize; //cached mObjectBounds[1].getLength3() diff --git a/indra/newview/llviewercamera.h b/indra/newview/llviewercamera.h index 184033de42..b857c7fe89 100644 --- a/indra/newview/llviewercamera.h +++ b/indra/newview/llviewercamera.h @@ -51,9 +51,19 @@ const BOOL NOT_FOR_SELECTION = FALSE; extern template class LLViewerCamera* LLSingleton<class LLViewerCamera>::getInstance(); #endif +LL_ALIGN_PREFIX(16) class LLViewerCamera : public LLCamera, public LLSingleton<LLViewerCamera> { public: + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } typedef enum { @@ -137,6 +147,7 @@ protected: S16 mZoomSubregion; public: -}; +} LL_ALIGN_POSTFIX(16); + #endif // LL_LLVIEWERCAMERA_H diff --git a/indra/newview/llviewerjointmesh.cpp b/indra/newview/llviewerjointmesh.cpp index f029ae5302..5d1aa870a3 100644 --- a/indra/newview/llviewerjointmesh.cpp +++ b/indra/newview/llviewerjointmesh.cpp @@ -729,8 +729,10 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w F32* vw = (F32*) vertex_weightsp.get(); F32* cw = (F32*) clothing_weightsp.get(); - LLVector4a::memcpyNonAliased16(tc, (F32*) mMesh->getTexCoords(), num_verts*2*sizeof(F32)); - LLVector4a::memcpyNonAliased16(vw, (F32*) mMesh->getWeights(), num_verts*sizeof(F32)); + S32 tc_size = (num_verts*2*sizeof(F32)+0xF) & ~0xF; + LLVector4a::memcpyNonAliased16(tc, (F32*) mMesh->getTexCoords(), tc_size); + S32 vw_size = (num_verts*sizeof(F32)+0xF) & ~0xF; + LLVector4a::memcpyNonAliased16(vw, (F32*) mMesh->getWeights(), vw_size); LLVector4a::memcpyNonAliased16(cw, (F32*) mMesh->getClothingWeights(), num_verts*4*sizeof(F32)); } diff --git a/indra/newview/llvoavatar.cpp b/indra/newview/llvoavatar.cpp index 33dc12c473..1b7009a5c2 100644 --- a/indra/newview/llvoavatar.cpp +++ b/indra/newview/llvoavatar.cpp @@ -2692,7 +2692,7 @@ void LLVOAvatar::idleUpdateMisc(bool detailed_update) if (isImpostor() && !mNeedsImpostorUpdate) { - LLVector4a ext[2]; + LL_ALIGN_16(LLVector4a ext[2]); F32 distance; LLVector3 angle; diff --git a/indra/newview/llvoavatar.h b/indra/newview/llvoavatar.h index 6fb56a4c0b..4081a1408d 100644 --- a/indra/newview/llvoavatar.h +++ b/indra/newview/llvoavatar.h @@ -93,6 +93,16 @@ protected: **/ public: + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + LLVOAvatar(const LLUUID &id, const LLPCode pcode, LLViewerRegion *regionp); virtual void markDead(); static void initClass(); // Initialize data that's only init'd once per class. @@ -215,7 +225,7 @@ public: bool isBuilt() const { return mIsBuilt; } private: //aligned members - LLVector4a mImpostorExtents[2]; + LL_ALIGN_16(LLVector4a mImpostorExtents[2]); private: BOOL mSupportsAlphaLayers; // For backwards compatibility, TRUE for 1.23+ clients diff --git a/indra/newview/llvoavatarself.h b/indra/newview/llvoavatarself.h index 543891ca63..2b273e616c 100644 --- a/indra/newview/llvoavatarself.h +++ b/indra/newview/llvoavatarself.h @@ -49,6 +49,16 @@ class LLVOAvatarSelf : **/ public: + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + LLVOAvatarSelf(const LLUUID &id, const LLPCode pcode, LLViewerRegion *regionp); virtual ~LLVOAvatarSelf(); virtual void markDead(); diff --git a/indra/newview/viewer_manifest.py b/indra/newview/viewer_manifest.py index 7c6b5403e1..894d2f0925 100644 --- a/indra/newview/viewer_manifest.py +++ b/indra/newview/viewer_manifest.py @@ -1080,7 +1080,15 @@ class Linux_i686Manifest(LinuxManifest): # previous call did, without having to explicitly state the # version number. self.path("libfontconfig.so.*.*") - self.path("libtcmalloc.so*") #formerly called google perf tools + try: + self.path("libtcmalloc.so", "libtcmalloc.so") #formerly called google perf tools + self.path("libtcmalloc.so.0", "libtcmalloc.so.0") #formerly called google perf tools + self.path("libtcmalloc.so.0.1.0", "libtcmalloc.so.0.1.0") #formerly called google perf tools + pass + except: + print "tcmalloc files not found, skipping" + pass + try: self.path("libfmod-3.75.so") pass |