From a766e26db46c7c054bae1021470dbe365f2a3cb3 Mon Sep 17 00:00:00 2001 From: William Todd Stinson Date: Mon, 10 Sep 2012 10:37:02 -0700 Subject: Backing out the changes contributing to DRTVWR-167 and DRTVWR-179 from the repository. --- indra/llmath/CMakeLists.txt | 1 - indra/llmath/llcamera.h | 12 ++-- indra/llmath/llmatrix3a.h | 2 +- indra/llmath/llmatrix4a.h | 2 +- indra/llmath/lloctree.h | 147 +++++++++++----------------------------- indra/llmath/llplane.h | 4 +- indra/llmath/llsimdmath.h | 3 +- indra/llmath/llsimdtypes.inl | 2 - indra/llmath/llvector4a.cpp | 8 +-- indra/llmath/llvector4a.h | 6 +- indra/llmath/llvector4a.inl | 1 - indra/llmath/llvector4logical.h | 2 - indra/llmath/llvolume.cpp | 53 ++++++++------- indra/llmath/llvolumeoctree.cpp | 6 +- indra/llmath/llvolumeoctree.h | 35 ++-------- 15 files changed, 89 insertions(+), 195 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/CMakeLists.txt b/indra/llmath/CMakeLists.txt index 5865ae030c..b5e59c1ca3 100644 --- a/indra/llmath/CMakeLists.txt +++ b/indra/llmath/CMakeLists.txt @@ -117,7 +117,6 @@ if (LL_TESTS) # INTEGRATION TESTS set(test_libs llmath llcommon ${LLCOMMON_LIBRARIES} ${WINDOWS_LIBRARIES}) # TODO: Some of these need refactoring to be proper Unit tests rather than Integration tests. - LL_ADD_INTEGRATION_TEST(alignment "" "${test_libs}") LL_ADD_INTEGRATION_TEST(llbbox llbbox.cpp "${test_libs}") LL_ADD_INTEGRATION_TEST(llquaternion llquaternion.cpp "${test_libs}") LL_ADD_INTEGRATION_TEST(mathmisc "" "${test_libs}") diff --git a/indra/llmath/llcamera.h b/indra/llmath/llcamera.h index 0b591be622..ec67b91d05 100644 --- a/indra/llmath/llcamera.h +++ b/indra/llmath/llcamera.h @@ -60,7 +60,7 @@ static const F32 MAX_FIELD_OF_VIEW = 175.f * DEG_TO_RAD; // roll(), pitch(), yaw() // etc... -LL_ALIGN_PREFIX(16) + class LLCamera : public LLCoordFrame { @@ -108,7 +108,7 @@ public: }; private: - LL_ALIGN_16(LLPlane mAgentPlanes[7]); //frustum planes in agent space a la gluUnproject (I'm a bastard, I know) - DaveP + LLPlane mAgentPlanes[7]; //frustum planes in agent space a la gluUnproject (I'm a bastard, I know) - DaveP U8 mPlaneMask[8]; // 8 for alignment F32 mView; // angle between top and bottom frustum planes in radians. @@ -116,13 +116,13 @@ private: S32 mViewHeightInPixels; // for ViewHeightInPixels() only F32 mNearPlane; F32 mFarPlane; - LL_ALIGN_16(LLPlane mLocalPlanes[4]); + LLPlane mLocalPlanes[4]; F32 mFixedDistance; // Always return this distance, unless < 0 LLVector3 mFrustCenter; // center of frustum and radius squared for ultra-quick exclusion test F32 mFrustRadiusSquared; - LL_ALIGN_16(LLPlane mWorldPlanes[PLANE_NUM]); - LL_ALIGN_16(LLPlane mHorizPlanes[HORIZ_PLANE_NUM]); + LLPlane mWorldPlanes[PLANE_NUM]; + LLPlane mHorizPlanes[HORIZ_PLANE_NUM]; U32 mPlaneCount; //defaults to 6, if setUserClipPlane is called, uses user supplied clip plane in @@ -208,7 +208,7 @@ protected: void calculateFrustumPlanes(F32 left, F32 right, F32 top, F32 bottom); void calculateFrustumPlanesFromWindow(F32 x1, F32 y1, F32 x2, F32 y2); void calculateWorldFrustumPlanes(); -} LL_ALIGN_POSTFIX(16); +}; #endif diff --git a/indra/llmath/llmatrix3a.h b/indra/llmath/llmatrix3a.h index 9916cfd2da..adb7e3389d 100644 --- a/indra/llmath/llmatrix3a.h +++ b/indra/llmath/llmatrix3a.h @@ -111,7 +111,7 @@ public: protected: - LL_ALIGN_16(LLVector4a mColumns[3]); + LLVector4a mColumns[3]; }; diff --git a/indra/llmath/llmatrix4a.h b/indra/llmath/llmatrix4a.h index c4cefdb4fa..27cf5b79f6 100644 --- a/indra/llmath/llmatrix4a.h +++ b/indra/llmath/llmatrix4a.h @@ -34,7 +34,7 @@ class LLMatrix4a { public: - LL_ALIGN_16(LLVector4a mMatrix[4]); + LLVector4a mMatrix[4]; inline void clear() { diff --git a/indra/llmath/lloctree.h b/indra/llmath/lloctree.h index c3f6f7de2a..1b11e83b4a 100644 --- a/indra/llmath/lloctree.h +++ b/indra/llmath/lloctree.h @@ -31,6 +31,7 @@ #include "v3math.h" #include "llvector4a.h" #include +#include #define OCT_ERRS LL_WARNS("OctreeErrors") @@ -78,18 +79,16 @@ public: typedef LLOctreeTraveler oct_traveler; typedef LLTreeTraveler tree_traveler; - typedef LLPointer* element_list; - typedef LLPointer* element_iter; - typedef const LLPointer* const_element_iter; + typedef typename std::set > element_list; + typedef typename element_list::iterator element_iter; + typedef typename element_list::const_iterator const_element_iter; typedef typename std::vector*>::iterator tree_listener_iter; - typedef LLOctreeNode** child_list; - typedef LLOctreeNode** child_iter; - + typedef typename std::vector* > child_list; typedef LLTreeNode BaseType; typedef LLOctreeNode oct_node; typedef LLOctreeListener oct_listener; - void* operator new(size_t size) + /*void* operator new(size_t size) { return ll_aligned_malloc_16(size); } @@ -97,7 +96,7 @@ public: void operator delete(void* ptr) { ll_aligned_free_16(ptr); - } + }*/ LLOctreeNode( const LLVector4a& center, const LLVector4a& size, @@ -106,9 +105,6 @@ public: : mParent((oct_node*)parent), mOctant(octant) { - mData = NULL; - mDataEnd = NULL; - mCenter = center; mSize = size; @@ -127,16 +123,6 @@ public: { BaseType::destroyListeners(); - for (U32 i = 0; i < mElementCount; ++i) - { - mData[i]->setBinIndex(-1); - mData[i] = NULL; - } - - free(mData); - mData = NULL; - mDataEnd = NULL; - for (U32 i = 0; i < getChildCount(); i++) { delete getChild(i); @@ -233,17 +219,12 @@ public: } void accept(oct_traveler* visitor) { visitor->visit(this); } - virtual bool isLeaf() const { return mChildCount == 0; } + virtual bool isLeaf() const { return mChild.empty(); } U32 getElementCount() const { return mElementCount; } - bool isEmpty() const { return mElementCount == 0; } element_list& getData() { return mData; } const element_list& getData() const { return mData; } - element_iter getDataBegin() { return mData; } - element_iter getDataEnd() { return mDataEnd; } - const_element_iter getDataBegin() const { return mData; } - const_element_iter getDataEnd() const { return mDataEnd; } - + U32 getChildCount() const { return mChildCount; } oct_node* getChild(U32 index) { return mChild[index]; } const oct_node* getChild(U32 index) const { return mChild[index]; } @@ -308,7 +289,7 @@ public: virtual bool insert(T* data) { - if (data == NULL || data->getBinIndex() != -1) + if (data == NULL) { OCT_ERRS << "!!! INVALID ELEMENT ADDED TO OCTREE BRANCH !!!" << llendl; return false; @@ -321,16 +302,13 @@ public: if ((getElementCount() < gOctreeMaxCapacity && contains(data->getBinRadius()) || (data->getBinRadius() > getSize()[0] && parent && parent->getElementCount() >= gOctreeMaxCapacity))) { //it belongs here - mElementCount++; - mData = (element_list) realloc(mData, sizeof(LLPointer)*mElementCount); - - //avoid unref on uninitialized memory - memset(mData+mElementCount-1, 0, sizeof(LLPointer)); + //if this is a redundant insertion, error out (should never happen) + llassert(mData.find(data) == mData.end()); - mData[mElementCount-1] = data; - mDataEnd = mData + mElementCount; - data->setBinIndex(mElementCount-1); + mData.insert(data); BaseType::insert(data); + + mElementCount = mData.size(); return true; } else @@ -364,16 +342,10 @@ public: if( lt == 0x7 ) { - mElementCount++; - mData = (element_list) realloc(mData, sizeof(LLPointer)*mElementCount); - - //avoid unref on uninitialized memory - memset(mData+mElementCount-1, 0, sizeof(LLPointer)); - - mData[mElementCount-1] = data; - mDataEnd = mData + mElementCount; - data->setBinIndex(mElementCount-1); + mData.insert(data); BaseType::insert(data); + + mElementCount = mData.size(); return true; } @@ -422,59 +394,23 @@ public: return false; } - void _remove(T* data, S32 i) - { //precondition -- mElementCount > 0, idx is in range [0, mElementCount) - - mElementCount--; - data->setBinIndex(-1); - - if (mElementCount > 0) - { - if (mElementCount != i) - { - mData[i] = mData[mElementCount]; //might unref data, do not access data after this point - mData[i]->setBinIndex(i); - } - - mData[mElementCount] = NULL; //needed for unref - mData = (element_list) realloc(mData, sizeof(LLPointer)*mElementCount); - mDataEnd = mData+mElementCount; - } - else - { - mData[0] = NULL; //needed for unref - free(mData); - mData = NULL; - mDataEnd = NULL; - } - - notifyRemoval(data); - checkAlive(); - } - bool remove(T* data) { - S32 i = data->getBinIndex(); - - if (i >= 0 && i < mElementCount) - { - if (mData[i] == data) - { //found it - _remove(data, i); - llassert(data->getBinIndex() == -1); - return true; - } - } - - if (isInside(data)) + if (mData.find(data) != mData.end()) + { //we have data + mData.erase(data); + mElementCount = mData.size(); + notifyRemoval(data); + checkAlive(); + return true; + } + else if (isInside(data)) { oct_node* dest = getNodeAt(data); if (dest != this) { - bool ret = dest->remove(data); - llassert(data->getBinIndex() == -1); - return ret; + return dest->remove(data); } } @@ -493,20 +429,19 @@ public: //node is now root llwarns << "!!! OCTREE REMOVING FACE BY ADDRESS, SEVERE PERFORMANCE PENALTY |||" << llendl; node->removeByAddress(data); - llassert(data->getBinIndex() == -1); return true; } void removeByAddress(T* data) { - for (U32 i = 0; i < mElementCount; ++i) + if (mData.find(data) != mData.end()) { - if (mData[i] == data) - { //we have data - _remove(data, i); - llwarns << "FOUND!" << llendl; - return; - } + mData.erase(data); + mElementCount = mData.size(); + notifyRemoval(data); + llwarns << "FOUND!" << llendl; + checkAlive(); + return; } for (U32 i = 0; i < getChildCount(); i++) @@ -518,8 +453,8 @@ public: void clearChildren() { + mChild.clear(); mChildCount = 0; - U32* foo = (U32*) mChildMap; foo[0] = foo[1] = 0xFFFFFFFF; } @@ -581,7 +516,7 @@ public: mChildMap[child->getOctant()] = mChildCount; - mChild[mChildCount] = child; + mChild.push_back(child); ++mChildCount; child->setParent(this); @@ -608,12 +543,9 @@ public: mChild[index]->destroy(); delete mChild[index]; } - + mChild.erase(mChild.begin() + index); --mChildCount; - mChild[index] = mChild[mChildCount]; - - //rebuild child map U32* foo = (U32*) mChildMap; foo[0] = foo[1] = 0xFFFFFFFF; @@ -669,12 +601,11 @@ protected: oct_node* mParent; U8 mOctant; - LLOctreeNode* mChild[8]; + child_list mChild; U8 mChildMap[8]; U32 mChildCount; element_list mData; - element_iter mDataEnd; U32 mElementCount; }; diff --git a/indra/llmath/llplane.h b/indra/llmath/llplane.h index 3c32441b11..a611894721 100644 --- a/indra/llmath/llplane.h +++ b/indra/llmath/llplane.h @@ -36,8 +36,6 @@ // The plane normal = [A, B, C] // The closest approach = D / sqrt(A*A + B*B + C*C) - -LL_ALIGN_PREFIX(16) class LLPlane { public: @@ -96,7 +94,7 @@ public: private: LLVector4a mV; -} LL_ALIGN_POSTFIX(16); +}; diff --git a/indra/llmath/llsimdmath.h b/indra/llmath/llsimdmath.h index 01458521ec..c7cdf7b32c 100644 --- a/indra/llmath/llsimdmath.h +++ b/indra/llmath/llsimdmath.h @@ -67,10 +67,11 @@ template T* LL_NEXT_ALIGNED_ADDRESS_64(T* address) #define LL_ALIGN_16(var) LL_ALIGN_PREFIX(16) var LL_ALIGN_POSTFIX(16) + + #include #include -#include "llmemory.h" #include "llsimdtypes.h" #include "llsimdtypes.inl" diff --git a/indra/llmath/llsimdtypes.inl b/indra/llmath/llsimdtypes.inl index e905c84954..712239e425 100644 --- a/indra/llmath/llsimdtypes.inl +++ b/indra/llmath/llsimdtypes.inl @@ -62,7 +62,6 @@ inline LLSimdScalar operator/(const LLSimdScalar& a, const LLSimdScalar& b) inline LLSimdScalar operator-(const LLSimdScalar& a) { static LL_ALIGN_16(const U32 signMask[4]) = {0x80000000, 0x80000000, 0x80000000, 0x80000000 }; - ll_assert_aligned(signMask,16); return _mm_xor_ps(*reinterpret_cast(signMask), a); } @@ -147,7 +146,6 @@ inline LLSimdScalar& LLSimdScalar::operator/=(const LLSimdScalar& rhs) inline LLSimdScalar LLSimdScalar::getAbs() const { static const LL_ALIGN_16(U32 F_ABS_MASK_4A[4]) = { 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF }; - ll_assert_aligned(F_ABS_MASK_4A,16); return _mm_and_ps( mQ, *reinterpret_cast(F_ABS_MASK_4A)); } diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp index 6edeb0fefe..b66b7a7076 100644 --- a/indra/llmath/llvector4a.cpp +++ b/indra/llmath/llvector4a.cpp @@ -24,7 +24,6 @@ * $/LicenseInfo$ */ -#include "llmemory.h" #include "llmath.h" #include "llquantize.h" @@ -45,10 +44,7 @@ extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast ( F assert(dst != NULL); assert(bytes > 0); assert((bytes % sizeof(F32))== 0); - ll_assert_aligned(src,16); - ll_assert_aligned(dst,16); - assert(bytes%16==0); - + F32* end = dst + (bytes / sizeof(F32) ); if (bytes > 64) @@ -193,8 +189,6 @@ void LLVector4a::quantize16( const LLVector4a& low, const LLVector4a& high ) LLVector4a oneOverDelta; { static LL_ALIGN_16( const F32 F_TWO_4A[4] ) = { 2.f, 2.f, 2.f, 2.f }; - ll_assert_aligned(F_TWO_4A,16); - LLVector4a two; two.load4a( F_TWO_4A ); // Here we use _mm_rcp_ps plus one round of newton-raphson diff --git a/indra/llmath/llvector4a.h b/indra/llmath/llvector4a.h index 0526793d3a..596082509d 100644 --- a/indra/llmath/llvector4a.h +++ b/indra/llmath/llvector4a.h @@ -32,7 +32,6 @@ class LLRotation; #include #include "llpreprocessor.h" -#include "llmemory.h" /////////////////////////////////// // FIRST TIME USERS PLEASE READ @@ -47,7 +46,6 @@ class LLRotation; // LLVector3/LLVector4. ///////////////////////////////// -LL_ALIGN_PREFIX(16) class LLVector4a { public: @@ -84,7 +82,6 @@ public: } // Copy words 16-byte blocks from src to dst. Source and destination must not overlap. - // Source and dest must be 16-byte aligned and size must be multiple of 16. static void memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes); //////////////////////////////////// @@ -93,7 +90,6 @@ public: LLVector4a() { //DO NOT INITIALIZE -- The overhead is completely unnecessary - ll_assert_aligned(this,16); } LLVector4a(F32 x, F32 y, F32 z, F32 w = 0.f) @@ -317,7 +313,7 @@ public: private: LLQuad mQ; -} LL_ALIGN_POSTFIX(16); +}; inline void update_min_max(LLVector4a& min, LLVector4a& max, const LLVector4a& p) { diff --git a/indra/llmath/llvector4a.inl b/indra/llmath/llvector4a.inl index 7c52ffef21..7ad22a5631 100644 --- a/indra/llmath/llvector4a.inl +++ b/indra/llmath/llvector4a.inl @@ -475,7 +475,6 @@ inline void LLVector4a::setLerp(const LLVector4a& lhs, const LLVector4a& rhs, F3 inline LLBool32 LLVector4a::isFinite3() const { static LL_ALIGN_16(const U32 nanOrInfMask[4]) = { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 }; - ll_assert_aligned(nanOrInfMask,16); const __m128i nanOrInfMaskV = *reinterpret_cast (nanOrInfMask); const __m128i maskResult = _mm_and_si128( _mm_castps_si128(mQ), nanOrInfMaskV ); const LLVector4Logical equalityCheck = _mm_castsi128_ps(_mm_cmpeq_epi32( maskResult, nanOrInfMaskV )); diff --git a/indra/llmath/llvector4logical.h b/indra/llmath/llvector4logical.h index c5698f7cea..dd66b09d43 100644 --- a/indra/llmath/llvector4logical.h +++ b/indra/llmath/llvector4logical.h @@ -27,7 +27,6 @@ #ifndef LL_VECTOR4LOGICAL_H #define LL_VECTOR4LOGICAL_H -#include "llmemory.h" //////////////////////////// // LLVector4Logical @@ -78,7 +77,6 @@ public: inline LLVector4Logical& invert() { static const LL_ALIGN_16(U32 allOnes[4]) = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF }; - ll_assert_aligned(allOnes,16); mQ = _mm_andnot_ps( mQ, *(LLQuad*)(allOnes) ); return *this; } diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 53d56e96da..cc9744756f 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -95,6 +95,17 @@ const S32 SCULPT_MIN_AREA_DETAIL = 1; extern BOOL gDebugGL; +void assert_aligned(void* ptr, uintptr_t alignment) +{ +#if 0 + uintptr_t t = (uintptr_t) ptr; + if (t%alignment != 0) + { + llerrs << "Alignment check failed." << llendl; + } +#endif +} + BOOL check_same_clock_dir( const LLVector3& pt1, const LLVector3& pt2, const LLVector3& pt3, const LLVector3& norm) { LLVector3 test = (pt2-pt1)%(pt3-pt2); @@ -317,16 +328,16 @@ public: LLVector4a& min = node->mExtents[0]; LLVector4a& max = node->mExtents[1]; - if (!branch->isEmpty()) + if (!branch->getData().empty()) { //node has data, find AABB that binds data set - const LLVolumeTriangle* tri = *(branch->getDataBegin()); + const LLVolumeTriangle* tri = *(branch->getData().begin()); //initialize min/max to first available vertex min = *(tri->mV[0]); max = *(tri->mV[0]); for (LLOctreeNode::const_element_iter iter = - branch->getDataBegin(); iter != branch->getDataEnd(); ++iter) + branch->getData().begin(); iter != branch->getData().end(); ++iter) { //for each triangle in node //stretch by triangles in node @@ -341,7 +352,7 @@ public: max.setMax(max, *tri->mV[2]); } } - else if (!branch->isLeaf()) + else if (!branch->getChildren().empty()) { //no data, but child nodes exist LLVolumeOctreeListener* child = (LLVolumeOctreeListener*) branch->getChild(0)->getListener(0); @@ -6951,14 +6962,14 @@ void LLVolumeFace::resizeVertices(S32 num_verts) if (num_verts) { mPositions = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); - ll_assert_aligned(mPositions, 16); + assert_aligned(mPositions, 16); mNormals = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); - ll_assert_aligned(mNormals, 16); + assert_aligned(mNormals, 16); //pad texture coordinate block end to allow for QWORD reads S32 size = ((num_verts*sizeof(LLVector2)) + 0xF) & ~0xF; mTexCoords = (LLVector2*) ll_aligned_malloc_16(size); - ll_assert_aligned(mTexCoords, 16); + assert_aligned(mTexCoords, 16); } else { @@ -6982,17 +6993,14 @@ void LLVolumeFace::pushVertex(const LLVector4a& pos, const LLVector4a& norm, con // S32 old_size = mNumVertices*16; //positions - mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_size); - ll_assert_aligned(mPositions,16); + mPositions = (LLVector4a*) realloc(mPositions, new_size); //normals - mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_size); - ll_assert_aligned(mNormals,16); - + mNormals = (LLVector4a*) realloc(mNormals, new_size); + //tex coords new_size = ((new_verts*8)+0xF) & ~0xF; - mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, new_size); - ll_assert_aligned(mTexCoords,16); + mTexCoords = (LLVector2*) realloc(mTexCoords, new_size); //just clear binormals @@ -7045,8 +7053,7 @@ void LLVolumeFace::pushIndex(const U16& idx) S32 old_size = ((mNumIndices*2)+0xF) & ~0xF; if (new_size != old_size) { - mIndices = (U16*) ll_aligned_realloc_16(mIndices, new_size); - ll_assert_aligned(mIndices,16); + mIndices = (U16*) realloc(mIndices, new_size); } mIndices[mNumIndices++] = idx; @@ -7087,12 +7094,12 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat } //allocate new buffer space - mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_count*sizeof(LLVector4a)); - ll_assert_aligned(mPositions, 16); - mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_count*sizeof(LLVector4a)); - ll_assert_aligned(mNormals, 16); - mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, (new_count*sizeof(LLVector2)+0xF) & ~0xF); - ll_assert_aligned(mTexCoords, 16); + mPositions = (LLVector4a*) realloc(mPositions, new_count*sizeof(LLVector4a)); + assert_aligned(mPositions, 16); + mNormals = (LLVector4a*) realloc(mNormals, new_count*sizeof(LLVector4a)); + assert_aligned(mNormals, 16); + mTexCoords = (LLVector2*) realloc(mTexCoords, (new_count*sizeof(LLVector2)+0xF) & ~0xF); + assert_aligned(mTexCoords, 16); mNumVertices = new_count; @@ -7138,7 +7145,7 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat new_count = mNumIndices + face.mNumIndices; //allocate new index buffer - mIndices = (U16*) ll_aligned_realloc_16(mIndices, (new_count*sizeof(U16)+0xF) & ~0xF); + mIndices = (U16*) realloc(mIndices, (new_count*sizeof(U16)+0xF) & ~0xF); //get destination address into new index buffer U16* dst_idx = mIndices+mNumIndices; diff --git a/indra/llmath/llvolumeoctree.cpp b/indra/llmath/llvolumeoctree.cpp index cc83cb7235..b5a935c2b5 100644 --- a/indra/llmath/llvolumeoctree.cpp +++ b/indra/llmath/llvolumeoctree.cpp @@ -131,7 +131,7 @@ void LLOctreeTriangleRayIntersect::traverse(const LLOctreeNode void LLOctreeTriangleRayIntersect::visit(const LLOctreeNode* node) { for (LLOctreeNode::const_element_iter iter = - node->getDataBegin(); iter != node->getDataEnd(); ++iter) + node->getData().begin(); iter != node->getData().end(); ++iter) { const LLVolumeTriangle* tri = *iter; @@ -236,8 +236,8 @@ void LLVolumeOctreeValidate::visit(const LLOctreeNode* branch) } //children fit, check data - for (LLOctreeNode::const_element_iter iter = branch->getDataBegin(); - iter != branch->getDataEnd(); ++iter) + for (LLOctreeNode::const_element_iter iter = branch->getData().begin(); + iter != branch->getData().end(); ++iter) { const LLVolumeTriangle* tri = *iter; diff --git a/indra/llmath/llvolumeoctree.h b/indra/llmath/llvolumeoctree.h index 9ae34a0c4e..688d91dc40 100644 --- a/indra/llmath/llvolumeoctree.h +++ b/indra/llmath/llvolumeoctree.h @@ -37,19 +37,9 @@ class LLVolumeTriangle : public LLRefCount { public: - void* operator new(size_t size) - { - return ll_aligned_malloc_16(size); - } - - void operator delete(void* ptr) - { - ll_aligned_free_16(ptr); - } - LLVolumeTriangle() { - mBinIndex = -1; + } LLVolumeTriangle(const LLVolumeTriangle& rhs) @@ -68,38 +58,21 @@ public: } - LL_ALIGN_16(LLVector4a mPositionGroup); + LLVector4a mPositionGroup; const LLVector4a* mV[3]; U16 mIndex[3]; F32 mRadius; - mutable S32 mBinIndex; - virtual const LLVector4a& getPositionGroup() const; virtual const F32& getBinRadius() const; - - S32 getBinIndex() const { return mBinIndex; } - void setBinIndex(S32 idx) const { mBinIndex = idx; } - - }; class LLVolumeOctreeListener : public LLOctreeListener { public: - void* operator new(size_t size) - { - return ll_aligned_malloc_16(size); - } - - void operator delete(void* ptr) - { - ll_aligned_free_16(ptr); - } - LLVolumeOctreeListener(LLOctreeNode* node); ~LLVolumeOctreeListener(); @@ -126,8 +99,8 @@ public: public: - LL_ALIGN_16(LLVector4a mBounds[2]); // bounding box (center, size) of this node and all its children (tight fit to objects) - LL_ALIGN_16(LLVector4a mExtents[2]); // extents (min, max) of this node and all its children + LLVector4a mBounds[2]; // bounding box (center, size) of this node and all its children (tight fit to objects) + LLVector4a mExtents[2]; // extents (min, max) of this node and all its children }; class LLOctreeTriangleRayIntersect : public LLOctreeTraveler -- cgit v1.2.3 From 97d969a338c1e4f973eb817ba7701aff51a02ccb Mon Sep 17 00:00:00 2001 From: Oz Linden Date: Wed, 12 Sep 2012 14:36:37 -0400 Subject: initial attempt to restore changes that make removing tcmalloc possible; not tested --- indra/llmath/CMakeLists.txt | 1 + indra/llmath/llcamera.h | 12 +++++------ indra/llmath/llmatrix3a.h | 2 +- indra/llmath/llmatrix4a.h | 2 +- indra/llmath/lloctree.h | 4 ++-- indra/llmath/llplane.h | 4 +++- indra/llmath/llsimdmath.h | 3 +-- indra/llmath/llsimdtypes.inl | 2 ++ indra/llmath/llvector4a.cpp | 8 +++++++- indra/llmath/llvector4a.h | 6 +++++- indra/llmath/llvector4a.inl | 1 + indra/llmath/llvector4logical.h | 2 ++ indra/llmath/llvolume.cpp | 45 +++++++++++++++++------------------------ indra/llmath/llvolumeoctree.h | 26 +++++++++++++++++++++--- 14 files changed, 74 insertions(+), 44 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/CMakeLists.txt b/indra/llmath/CMakeLists.txt index b5e59c1ca3..5865ae030c 100644 --- a/indra/llmath/CMakeLists.txt +++ b/indra/llmath/CMakeLists.txt @@ -117,6 +117,7 @@ if (LL_TESTS) # INTEGRATION TESTS set(test_libs llmath llcommon ${LLCOMMON_LIBRARIES} ${WINDOWS_LIBRARIES}) # TODO: Some of these need refactoring to be proper Unit tests rather than Integration tests. + LL_ADD_INTEGRATION_TEST(alignment "" "${test_libs}") LL_ADD_INTEGRATION_TEST(llbbox llbbox.cpp "${test_libs}") LL_ADD_INTEGRATION_TEST(llquaternion llquaternion.cpp "${test_libs}") LL_ADD_INTEGRATION_TEST(mathmisc "" "${test_libs}") diff --git a/indra/llmath/llcamera.h b/indra/llmath/llcamera.h index ec67b91d05..0b591be622 100644 --- a/indra/llmath/llcamera.h +++ b/indra/llmath/llcamera.h @@ -60,7 +60,7 @@ static const F32 MAX_FIELD_OF_VIEW = 175.f * DEG_TO_RAD; // roll(), pitch(), yaw() // etc... - +LL_ALIGN_PREFIX(16) class LLCamera : public LLCoordFrame { @@ -108,7 +108,7 @@ public: }; private: - LLPlane mAgentPlanes[7]; //frustum planes in agent space a la gluUnproject (I'm a bastard, I know) - DaveP + LL_ALIGN_16(LLPlane mAgentPlanes[7]); //frustum planes in agent space a la gluUnproject (I'm a bastard, I know) - DaveP U8 mPlaneMask[8]; // 8 for alignment F32 mView; // angle between top and bottom frustum planes in radians. @@ -116,13 +116,13 @@ private: S32 mViewHeightInPixels; // for ViewHeightInPixels() only F32 mNearPlane; F32 mFarPlane; - LLPlane mLocalPlanes[4]; + LL_ALIGN_16(LLPlane mLocalPlanes[4]); F32 mFixedDistance; // Always return this distance, unless < 0 LLVector3 mFrustCenter; // center of frustum and radius squared for ultra-quick exclusion test F32 mFrustRadiusSquared; - LLPlane mWorldPlanes[PLANE_NUM]; - LLPlane mHorizPlanes[HORIZ_PLANE_NUM]; + LL_ALIGN_16(LLPlane mWorldPlanes[PLANE_NUM]); + LL_ALIGN_16(LLPlane mHorizPlanes[HORIZ_PLANE_NUM]); U32 mPlaneCount; //defaults to 6, if setUserClipPlane is called, uses user supplied clip plane in @@ -208,7 +208,7 @@ protected: void calculateFrustumPlanes(F32 left, F32 right, F32 top, F32 bottom); void calculateFrustumPlanesFromWindow(F32 x1, F32 y1, F32 x2, F32 y2); void calculateWorldFrustumPlanes(); -}; +} LL_ALIGN_POSTFIX(16); #endif diff --git a/indra/llmath/llmatrix3a.h b/indra/llmath/llmatrix3a.h index adb7e3389d..9916cfd2da 100644 --- a/indra/llmath/llmatrix3a.h +++ b/indra/llmath/llmatrix3a.h @@ -111,7 +111,7 @@ public: protected: - LLVector4a mColumns[3]; + LL_ALIGN_16(LLVector4a mColumns[3]); }; diff --git a/indra/llmath/llmatrix4a.h b/indra/llmath/llmatrix4a.h index 27cf5b79f6..c4cefdb4fa 100644 --- a/indra/llmath/llmatrix4a.h +++ b/indra/llmath/llmatrix4a.h @@ -34,7 +34,7 @@ class LLMatrix4a { public: - LLVector4a mMatrix[4]; + LL_ALIGN_16(LLVector4a mMatrix[4]); inline void clear() { diff --git a/indra/llmath/lloctree.h b/indra/llmath/lloctree.h index 1b11e83b4a..6c7744cdf1 100644 --- a/indra/llmath/lloctree.h +++ b/indra/llmath/lloctree.h @@ -88,7 +88,7 @@ public: typedef LLOctreeNode oct_node; typedef LLOctreeListener oct_listener; - /*void* operator new(size_t size) + void* operator new(size_t size) { return ll_aligned_malloc_16(size); } @@ -96,7 +96,7 @@ public: void operator delete(void* ptr) { ll_aligned_free_16(ptr); - }*/ + } LLOctreeNode( const LLVector4a& center, const LLVector4a& size, diff --git a/indra/llmath/llplane.h b/indra/llmath/llplane.h index a611894721..3c32441b11 100644 --- a/indra/llmath/llplane.h +++ b/indra/llmath/llplane.h @@ -36,6 +36,8 @@ // The plane normal = [A, B, C] // The closest approach = D / sqrt(A*A + B*B + C*C) + +LL_ALIGN_PREFIX(16) class LLPlane { public: @@ -94,7 +96,7 @@ public: private: LLVector4a mV; -}; +} LL_ALIGN_POSTFIX(16); diff --git a/indra/llmath/llsimdmath.h b/indra/llmath/llsimdmath.h index c7cdf7b32c..01458521ec 100644 --- a/indra/llmath/llsimdmath.h +++ b/indra/llmath/llsimdmath.h @@ -67,11 +67,10 @@ template T* LL_NEXT_ALIGNED_ADDRESS_64(T* address) #define LL_ALIGN_16(var) LL_ALIGN_PREFIX(16) var LL_ALIGN_POSTFIX(16) - - #include #include +#include "llmemory.h" #include "llsimdtypes.h" #include "llsimdtypes.inl" diff --git a/indra/llmath/llsimdtypes.inl b/indra/llmath/llsimdtypes.inl index 712239e425..e905c84954 100644 --- a/indra/llmath/llsimdtypes.inl +++ b/indra/llmath/llsimdtypes.inl @@ -62,6 +62,7 @@ inline LLSimdScalar operator/(const LLSimdScalar& a, const LLSimdScalar& b) inline LLSimdScalar operator-(const LLSimdScalar& a) { static LL_ALIGN_16(const U32 signMask[4]) = {0x80000000, 0x80000000, 0x80000000, 0x80000000 }; + ll_assert_aligned(signMask,16); return _mm_xor_ps(*reinterpret_cast(signMask), a); } @@ -146,6 +147,7 @@ inline LLSimdScalar& LLSimdScalar::operator/=(const LLSimdScalar& rhs) inline LLSimdScalar LLSimdScalar::getAbs() const { static const LL_ALIGN_16(U32 F_ABS_MASK_4A[4]) = { 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF }; + ll_assert_aligned(F_ABS_MASK_4A,16); return _mm_and_ps( mQ, *reinterpret_cast(F_ABS_MASK_4A)); } diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp index b66b7a7076..6edeb0fefe 100644 --- a/indra/llmath/llvector4a.cpp +++ b/indra/llmath/llvector4a.cpp @@ -24,6 +24,7 @@ * $/LicenseInfo$ */ +#include "llmemory.h" #include "llmath.h" #include "llquantize.h" @@ -44,7 +45,10 @@ extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast ( F assert(dst != NULL); assert(bytes > 0); assert((bytes % sizeof(F32))== 0); - + ll_assert_aligned(src,16); + ll_assert_aligned(dst,16); + assert(bytes%16==0); + F32* end = dst + (bytes / sizeof(F32) ); if (bytes > 64) @@ -189,6 +193,8 @@ void LLVector4a::quantize16( const LLVector4a& low, const LLVector4a& high ) LLVector4a oneOverDelta; { static LL_ALIGN_16( const F32 F_TWO_4A[4] ) = { 2.f, 2.f, 2.f, 2.f }; + ll_assert_aligned(F_TWO_4A,16); + LLVector4a two; two.load4a( F_TWO_4A ); // Here we use _mm_rcp_ps plus one round of newton-raphson diff --git a/indra/llmath/llvector4a.h b/indra/llmath/llvector4a.h index 596082509d..0526793d3a 100644 --- a/indra/llmath/llvector4a.h +++ b/indra/llmath/llvector4a.h @@ -32,6 +32,7 @@ class LLRotation; #include #include "llpreprocessor.h" +#include "llmemory.h" /////////////////////////////////// // FIRST TIME USERS PLEASE READ @@ -46,6 +47,7 @@ class LLRotation; // LLVector3/LLVector4. ///////////////////////////////// +LL_ALIGN_PREFIX(16) class LLVector4a { public: @@ -82,6 +84,7 @@ public: } // Copy words 16-byte blocks from src to dst. Source and destination must not overlap. + // Source and dest must be 16-byte aligned and size must be multiple of 16. static void memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes); //////////////////////////////////// @@ -90,6 +93,7 @@ public: LLVector4a() { //DO NOT INITIALIZE -- The overhead is completely unnecessary + ll_assert_aligned(this,16); } LLVector4a(F32 x, F32 y, F32 z, F32 w = 0.f) @@ -313,7 +317,7 @@ public: private: LLQuad mQ; -}; +} LL_ALIGN_POSTFIX(16); inline void update_min_max(LLVector4a& min, LLVector4a& max, const LLVector4a& p) { diff --git a/indra/llmath/llvector4a.inl b/indra/llmath/llvector4a.inl index 7ad22a5631..7c52ffef21 100644 --- a/indra/llmath/llvector4a.inl +++ b/indra/llmath/llvector4a.inl @@ -475,6 +475,7 @@ inline void LLVector4a::setLerp(const LLVector4a& lhs, const LLVector4a& rhs, F3 inline LLBool32 LLVector4a::isFinite3() const { static LL_ALIGN_16(const U32 nanOrInfMask[4]) = { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 }; + ll_assert_aligned(nanOrInfMask,16); const __m128i nanOrInfMaskV = *reinterpret_cast (nanOrInfMask); const __m128i maskResult = _mm_and_si128( _mm_castps_si128(mQ), nanOrInfMaskV ); const LLVector4Logical equalityCheck = _mm_castsi128_ps(_mm_cmpeq_epi32( maskResult, nanOrInfMaskV )); diff --git a/indra/llmath/llvector4logical.h b/indra/llmath/llvector4logical.h index dd66b09d43..c5698f7cea 100644 --- a/indra/llmath/llvector4logical.h +++ b/indra/llmath/llvector4logical.h @@ -27,6 +27,7 @@ #ifndef LL_VECTOR4LOGICAL_H #define LL_VECTOR4LOGICAL_H +#include "llmemory.h" //////////////////////////// // LLVector4Logical @@ -77,6 +78,7 @@ public: inline LLVector4Logical& invert() { static const LL_ALIGN_16(U32 allOnes[4]) = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF }; + ll_assert_aligned(allOnes,16); mQ = _mm_andnot_ps( mQ, *(LLQuad*)(allOnes) ); return *this; } diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index cc9744756f..11fa7080ce 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -95,17 +95,6 @@ const S32 SCULPT_MIN_AREA_DETAIL = 1; extern BOOL gDebugGL; -void assert_aligned(void* ptr, uintptr_t alignment) -{ -#if 0 - uintptr_t t = (uintptr_t) ptr; - if (t%alignment != 0) - { - llerrs << "Alignment check failed." << llendl; - } -#endif -} - BOOL check_same_clock_dir( const LLVector3& pt1, const LLVector3& pt2, const LLVector3& pt3, const LLVector3& norm) { LLVector3 test = (pt2-pt1)%(pt3-pt2); @@ -6962,14 +6951,14 @@ void LLVolumeFace::resizeVertices(S32 num_verts) if (num_verts) { mPositions = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); - assert_aligned(mPositions, 16); + ll_assert_aligned(mPositions, 16); mNormals = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); - assert_aligned(mNormals, 16); + ll_assert_aligned(mNormals, 16); //pad texture coordinate block end to allow for QWORD reads S32 size = ((num_verts*sizeof(LLVector2)) + 0xF) & ~0xF; mTexCoords = (LLVector2*) ll_aligned_malloc_16(size); - assert_aligned(mTexCoords, 16); + ll_assert_aligned(mTexCoords, 16); } else { @@ -6993,14 +6982,17 @@ void LLVolumeFace::pushVertex(const LLVector4a& pos, const LLVector4a& norm, con // S32 old_size = mNumVertices*16; //positions - mPositions = (LLVector4a*) realloc(mPositions, new_size); + mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_size); + ll_assert_aligned(mPositions,16); //normals - mNormals = (LLVector4a*) realloc(mNormals, new_size); - + mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_size); + ll_assert_aligned(mNormals,16); + //tex coords new_size = ((new_verts*8)+0xF) & ~0xF; - mTexCoords = (LLVector2*) realloc(mTexCoords, new_size); + mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, new_size); + ll_assert_aligned(mTexCoords,16); //just clear binormals @@ -7053,7 +7045,8 @@ void LLVolumeFace::pushIndex(const U16& idx) S32 old_size = ((mNumIndices*2)+0xF) & ~0xF; if (new_size != old_size) { - mIndices = (U16*) realloc(mIndices, new_size); + mIndices = (U16*) ll_aligned_realloc_16(mIndices, new_size); + ll_assert_aligned(mIndices,16); } mIndices[mNumIndices++] = idx; @@ -7094,12 +7087,12 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat } //allocate new buffer space - mPositions = (LLVector4a*) realloc(mPositions, new_count*sizeof(LLVector4a)); - assert_aligned(mPositions, 16); - mNormals = (LLVector4a*) realloc(mNormals, new_count*sizeof(LLVector4a)); - assert_aligned(mNormals, 16); - mTexCoords = (LLVector2*) realloc(mTexCoords, (new_count*sizeof(LLVector2)+0xF) & ~0xF); - assert_aligned(mTexCoords, 16); + mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_count*sizeof(LLVector4a)); + ll_assert_aligned(mPositions, 16); + mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_count*sizeof(LLVector4a)); + ll_assert_aligned(mNormals, 16); + mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, (new_count*sizeof(LLVector2)+0xF) & ~0xF); + ll_assert_aligned(mTexCoords, 16); mNumVertices = new_count; @@ -7145,7 +7138,7 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat new_count = mNumIndices + face.mNumIndices; //allocate new index buffer - mIndices = (U16*) realloc(mIndices, (new_count*sizeof(U16)+0xF) & ~0xF); + mIndices = (U16*) ll_aligned_realloc_16(mIndices, (new_count*sizeof(U16)+0xF) & ~0xF); //get destination address into new index buffer U16* dst_idx = mIndices+mNumIndices; diff --git a/indra/llmath/llvolumeoctree.h b/indra/llmath/llvolumeoctree.h index 688d91dc40..dac97b14b5 100644 --- a/indra/llmath/llvolumeoctree.h +++ b/indra/llmath/llvolumeoctree.h @@ -37,6 +37,16 @@ class LLVolumeTriangle : public LLRefCount { public: + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + LLVolumeTriangle() { @@ -58,7 +68,7 @@ public: } - LLVector4a mPositionGroup; + LL_ALIGN_16(LLVector4a mPositionGroup); const LLVector4a* mV[3]; U16 mIndex[3]; @@ -73,6 +83,16 @@ class LLVolumeOctreeListener : public LLOctreeListener { public: + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + LLVolumeOctreeListener(LLOctreeNode* node); ~LLVolumeOctreeListener(); @@ -99,8 +119,8 @@ public: public: - LLVector4a mBounds[2]; // bounding box (center, size) of this node and all its children (tight fit to objects) - LLVector4a mExtents[2]; // extents (min, max) of this node and all its children + LL_ALIGN_16(LLVector4a mBounds[2]); // bounding box (center, size) of this node and all its children (tight fit to objects) + LL_ALIGN_16(LLVector4a mExtents[2]); // extents (min, max) of this node and all its children }; class LLOctreeTriangleRayIntersect : public LLOctreeTraveler -- cgit v1.2.3 From 9ec263d2f1f51b334949281393ef2446796f1404 Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Wed, 12 Sep 2012 15:44:36 -0500 Subject: MAINT-1503 Minimal set of changes needed to safely disable tcmalloc --- indra/llmath/llvolume.cpp | 19 ++++++++++--------- indra/llmath/tests/alignment_test.cpp | 2 +- 2 files changed, 11 insertions(+), 10 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 11fa7080ce..ea09a3afe7 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -6979,19 +6979,20 @@ void LLVolumeFace::pushVertex(const LLVector4a& pos, const LLVector4a& norm, con { S32 new_verts = mNumVertices+1; S32 new_size = new_verts*16; -// S32 old_size = mNumVertices*16; + S32 old_size = mNumVertices*16; //positions - mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_size); + mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_size, old_size); ll_assert_aligned(mPositions,16); //normals - mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_size); + mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_size, old_size); ll_assert_aligned(mNormals,16); //tex coords new_size = ((new_verts*8)+0xF) & ~0xF; - mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, new_size); + old_size = ((mNumVertices*8)+0xF) & ~0xF; + mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, new_size, old_size); ll_assert_aligned(mTexCoords,16); @@ -7045,7 +7046,7 @@ void LLVolumeFace::pushIndex(const U16& idx) S32 old_size = ((mNumIndices*2)+0xF) & ~0xF; if (new_size != old_size) { - mIndices = (U16*) ll_aligned_realloc_16(mIndices, new_size); + mIndices = (U16*) ll_aligned_realloc_16(mIndices, new_size, old_size); ll_assert_aligned(mIndices,16); } @@ -7087,11 +7088,11 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat } //allocate new buffer space - mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_count*sizeof(LLVector4a)); + mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_count*sizeof(LLVector4a), mNumVertices*sizeof(LLVector4a)); ll_assert_aligned(mPositions, 16); - mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_count*sizeof(LLVector4a)); + mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_count*sizeof(LLVector4a), mNumVertices*sizeof(LLVector4a)); ll_assert_aligned(mNormals, 16); - mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, (new_count*sizeof(LLVector2)+0xF) & ~0xF); + mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, (new_count*sizeof(LLVector2)+0xF) & ~0xF, (mNumVertices*sizeof(LLVector2)+0xF) & ~0xF); ll_assert_aligned(mTexCoords, 16); mNumVertices = new_count; @@ -7138,7 +7139,7 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat new_count = mNumIndices + face.mNumIndices; //allocate new index buffer - mIndices = (U16*) ll_aligned_realloc_16(mIndices, (new_count*sizeof(U16)+0xF) & ~0xF); + mIndices = (U16*) ll_aligned_realloc_16(mIndices, (new_count*sizeof(U16)+0xF) & ~0xF, (mNumIndices*sizeof(U16)+0xF) & ~0xF); //get destination address into new index buffer U16* dst_idx = mIndices+mNumIndices; diff --git a/indra/llmath/tests/alignment_test.cpp b/indra/llmath/tests/alignment_test.cpp index ac0c45ae6f..5c426c57c3 100644 --- a/indra/llmath/tests/alignment_test.cpp +++ b/indra/llmath/tests/alignment_test.cpp @@ -78,7 +78,7 @@ void alignment_test_object_t::test<1>() align_ptr = ll_aligned_malloc_16(sizeof(MyVector4a)); ensure("ll_aligned_malloc_16 failed", is_aligned(align_ptr,16)); - align_ptr = ll_aligned_realloc_16(align_ptr,2*sizeof(MyVector4a)); + align_ptr = ll_aligned_realloc_16(align_ptr,2*sizeof(MyVector4a), sizeof(MyVector4a)); ensure("ll_aligned_realloc_16 failed", is_aligned(align_ptr,16)); ll_aligned_free_16(align_ptr); -- cgit v1.2.3 From 8f7bb8f6d58dfa1d0b160948b2ca790cd6d4ae5c Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Thu, 13 Sep 2012 20:31:22 -0500 Subject: Port over Simon's fix for alignment test --- indra/llmath/tests/alignment_test.cpp | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/tests/alignment_test.cpp b/indra/llmath/tests/alignment_test.cpp index 5c426c57c3..9105b1c1fd 100644 --- a/indra/llmath/tests/alignment_test.cpp +++ b/indra/llmath/tests/alignment_test.cpp @@ -34,16 +34,6 @@ #include "../llsimdmath.h" #include "../llvector4a.h" -void* operator new(size_t size) -{ - return ll_aligned_malloc_16(size); -} - -void operator delete(void *p) -{ - ll_aligned_free_16(p); -} - namespace tut { @@ -59,6 +49,27 @@ tut::alignment_test_t tut_alignment_test("LLAlignment"); LL_ALIGN_PREFIX(16) class MyVector4a { +public: + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void *p) + { + ll_aligned_free_16(p); + } + + void* operator new[](size_t count) + { // try to allocate count bytes for an array + return ll_aligned_malloc_16(count); + } + + void operator delete[](void *p) + { + ll_aligned_free_16(p); + } + LLQuad mQ; } LL_ALIGN_POSTFIX(16); -- cgit v1.2.3 From cf98064700a736f73a6c21ce899b186919cbeb64 Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Thu, 20 Sep 2012 09:48:55 -0400 Subject: reapply 52b6c9168974: MAINT-646 Factor std::set out of lloctree --- indra/llmath/lloctree.h | 125 ++++++++++++++++++++++++++++++---------- indra/llmath/llvolume.cpp | 6 +- indra/llmath/llvolumeoctree.cpp | 6 +- indra/llmath/llvolumeoctree.h | 9 ++- 4 files changed, 109 insertions(+), 37 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/lloctree.h b/indra/llmath/lloctree.h index 6c7744cdf1..a7e176fd51 100644 --- a/indra/llmath/lloctree.h +++ b/indra/llmath/lloctree.h @@ -79,9 +79,9 @@ public: typedef LLOctreeTraveler oct_traveler; typedef LLTreeTraveler tree_traveler; - typedef typename std::set > element_list; - typedef typename element_list::iterator element_iter; - typedef typename element_list::const_iterator const_element_iter; + typedef LLPointer* element_list; + typedef LLPointer* element_iter; + typedef const LLPointer* const_element_iter; typedef typename std::vector*>::iterator tree_listener_iter; typedef typename std::vector* > child_list; typedef LLTreeNode BaseType; @@ -105,6 +105,9 @@ public: : mParent((oct_node*)parent), mOctant(octant) { + mData = NULL; + mDataEnd = NULL; + mCenter = center; mSize = size; @@ -123,6 +126,16 @@ public: { BaseType::destroyListeners(); + for (U32 i = 0; i < mElementCount; ++i) + { + mData[i]->setBinIndex(-1); + mData[i] = NULL; + } + + free(mData); + mData = NULL; + mDataEnd = NULL; + for (U32 i = 0; i < getChildCount(); i++) { delete getChild(i); @@ -222,9 +235,14 @@ public: virtual bool isLeaf() const { return mChild.empty(); } U32 getElementCount() const { return mElementCount; } + bool isEmpty() const { return mElementCount == 0; } element_list& getData() { return mData; } const element_list& getData() const { return mData; } - + element_iter getDataBegin() { return mData; } + element_iter getDataEnd() { return mDataEnd; } + const_element_iter getDataBegin() const { return mData; } + const_element_iter getDataEnd() const { return mDataEnd; } + U32 getChildCount() const { return mChildCount; } oct_node* getChild(U32 index) { return mChild[index]; } const oct_node* getChild(U32 index) const { return mChild[index]; } @@ -289,7 +307,7 @@ public: virtual bool insert(T* data) { - if (data == NULL) + if (data == NULL || data->getBinIndex() != -1) { OCT_ERRS << "!!! INVALID ELEMENT ADDED TO OCTREE BRANCH !!!" << llendl; return false; @@ -302,13 +320,16 @@ public: if ((getElementCount() < gOctreeMaxCapacity && contains(data->getBinRadius()) || (data->getBinRadius() > getSize()[0] && parent && parent->getElementCount() >= gOctreeMaxCapacity))) { //it belongs here - //if this is a redundant insertion, error out (should never happen) - llassert(mData.find(data) == mData.end()); + mElementCount++; + mData = (element_list) realloc(mData, sizeof(LLPointer)*mElementCount); - mData.insert(data); - BaseType::insert(data); + //avoid unref on uninitialized memory + memset(mData+mElementCount-1, 0, sizeof(LLPointer)); - mElementCount = mData.size(); + mData[mElementCount-1] = data; + mDataEnd = mData + mElementCount; + data->setBinIndex(mElementCount-1); + BaseType::insert(data); return true; } else @@ -342,10 +363,16 @@ public: if( lt == 0x7 ) { - mData.insert(data); - BaseType::insert(data); + mElementCount++; + mData = (element_list) realloc(mData, sizeof(LLPointer)*mElementCount); + + //avoid unref on uninitialized memory + memset(mData+mElementCount-1, 0, sizeof(LLPointer)); - mElementCount = mData.size(); + mData[mElementCount-1] = data; + mDataEnd = mData + mElementCount; + data->setBinIndex(mElementCount-1); + BaseType::insert(data); return true; } @@ -394,23 +421,59 @@ public: return false; } + void _remove(T* data, S32 i) + { //precondition -- mElementCount > 0, idx is in range [0, mElementCount) + + mElementCount--; + data->setBinIndex(-1); + + if (mElementCount > 0) + { + if (mElementCount != i) + { + mData[i] = mData[mElementCount]; //might unref data, do not access data after this point + mData[i]->setBinIndex(i); + } + + mData[mElementCount] = NULL; //needed for unref + mData = (element_list) realloc(mData, sizeof(LLPointer)*mElementCount); + mDataEnd = mData+mElementCount; + } + else + { + mData[0] = NULL; //needed for unref + free(mData); + mData = NULL; + mDataEnd = NULL; + } + + notifyRemoval(data); + checkAlive(); + } + bool remove(T* data) { - if (mData.find(data) != mData.end()) - { //we have data - mData.erase(data); - mElementCount = mData.size(); - notifyRemoval(data); - checkAlive(); - return true; - } - else if (isInside(data)) + S32 i = data->getBinIndex(); + + if (i >= 0 && i < mElementCount) + { + if (mData[i] == data) + { //found it + _remove(data, i); + llassert(data->getBinIndex() == -1); + return true; + } + } + + if (isInside(data)) { oct_node* dest = getNodeAt(data); if (dest != this) { - return dest->remove(data); + bool ret = dest->remove(data); + llassert(data->getBinIndex() == -1); + return ret; } } @@ -429,19 +492,20 @@ public: //node is now root llwarns << "!!! OCTREE REMOVING FACE BY ADDRESS, SEVERE PERFORMANCE PENALTY |||" << llendl; node->removeByAddress(data); + llassert(data->getBinIndex() == -1); return true; } void removeByAddress(T* data) { - if (mData.find(data) != mData.end()) + for (U32 i = 0; i < mElementCount; ++i) { - mData.erase(data); - mElementCount = mData.size(); - notifyRemoval(data); - llwarns << "FOUND!" << llendl; - checkAlive(); - return; + if (mData[i] == data) + { //we have data + _remove(data, i); + llwarns << "FOUND!" << llendl; + return; + } } for (U32 i = 0; i < getChildCount(); i++) @@ -606,6 +670,7 @@ protected: U32 mChildCount; element_list mData; + element_iter mDataEnd; U32 mElementCount; }; diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index ea09a3afe7..919079fa88 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -317,16 +317,16 @@ public: LLVector4a& min = node->mExtents[0]; LLVector4a& max = node->mExtents[1]; - if (!branch->getData().empty()) + if (!branch->isEmpty()) { //node has data, find AABB that binds data set - const LLVolumeTriangle* tri = *(branch->getData().begin()); + const LLVolumeTriangle* tri = *(branch->getDataBegin()); //initialize min/max to first available vertex min = *(tri->mV[0]); max = *(tri->mV[0]); for (LLOctreeNode::const_element_iter iter = - branch->getData().begin(); iter != branch->getData().end(); ++iter) + branch->getDataBegin(); iter != branch->getDataEnd(); ++iter) { //for each triangle in node //stretch by triangles in node diff --git a/indra/llmath/llvolumeoctree.cpp b/indra/llmath/llvolumeoctree.cpp index b5a935c2b5..cc83cb7235 100644 --- a/indra/llmath/llvolumeoctree.cpp +++ b/indra/llmath/llvolumeoctree.cpp @@ -131,7 +131,7 @@ void LLOctreeTriangleRayIntersect::traverse(const LLOctreeNode void LLOctreeTriangleRayIntersect::visit(const LLOctreeNode* node) { for (LLOctreeNode::const_element_iter iter = - node->getData().begin(); iter != node->getData().end(); ++iter) + node->getDataBegin(); iter != node->getDataEnd(); ++iter) { const LLVolumeTriangle* tri = *iter; @@ -236,8 +236,8 @@ void LLVolumeOctreeValidate::visit(const LLOctreeNode* branch) } //children fit, check data - for (LLOctreeNode::const_element_iter iter = branch->getData().begin(); - iter != branch->getData().end(); ++iter) + for (LLOctreeNode::const_element_iter iter = branch->getDataBegin(); + iter != branch->getDataEnd(); ++iter) { const LLVolumeTriangle* tri = *iter; diff --git a/indra/llmath/llvolumeoctree.h b/indra/llmath/llvolumeoctree.h index dac97b14b5..9ae34a0c4e 100644 --- a/indra/llmath/llvolumeoctree.h +++ b/indra/llmath/llvolumeoctree.h @@ -49,7 +49,7 @@ public: LLVolumeTriangle() { - + mBinIndex = -1; } LLVolumeTriangle(const LLVolumeTriangle& rhs) @@ -74,9 +74,16 @@ public: U16 mIndex[3]; F32 mRadius; + mutable S32 mBinIndex; + virtual const LLVector4a& getPositionGroup() const; virtual const F32& getBinRadius() const; + + S32 getBinIndex() const { return mBinIndex; } + void setBinIndex(S32 idx) const { mBinIndex = idx; } + + }; class LLVolumeOctreeListener : public LLOctreeListener -- cgit v1.2.3 From 4127f3e7fcc725dbbf500c52605c5b950073c828 Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Thu, 20 Sep 2012 09:48:56 -0400 Subject: reapply efcec3eb374f: MAINT-646 Factor std::vector out of lloctree --- indra/llmath/lloctree.h | 18 +++++++++++------- indra/llmath/llvolume.cpp | 2 +- 2 files changed, 12 insertions(+), 8 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/lloctree.h b/indra/llmath/lloctree.h index a7e176fd51..c3f6f7de2a 100644 --- a/indra/llmath/lloctree.h +++ b/indra/llmath/lloctree.h @@ -31,7 +31,6 @@ #include "v3math.h" #include "llvector4a.h" #include -#include #define OCT_ERRS LL_WARNS("OctreeErrors") @@ -83,7 +82,9 @@ public: typedef LLPointer* element_iter; typedef const LLPointer* const_element_iter; typedef typename std::vector*>::iterator tree_listener_iter; - typedef typename std::vector* > child_list; + typedef LLOctreeNode** child_list; + typedef LLOctreeNode** child_iter; + typedef LLTreeNode BaseType; typedef LLOctreeNode oct_node; typedef LLOctreeListener oct_listener; @@ -232,7 +233,7 @@ public: } void accept(oct_traveler* visitor) { visitor->visit(this); } - virtual bool isLeaf() const { return mChild.empty(); } + virtual bool isLeaf() const { return mChildCount == 0; } U32 getElementCount() const { return mElementCount; } bool isEmpty() const { return mElementCount == 0; } @@ -517,8 +518,8 @@ public: void clearChildren() { - mChild.clear(); mChildCount = 0; + U32* foo = (U32*) mChildMap; foo[0] = foo[1] = 0xFFFFFFFF; } @@ -580,7 +581,7 @@ public: mChildMap[child->getOctant()] = mChildCount; - mChild.push_back(child); + mChild[mChildCount] = child; ++mChildCount; child->setParent(this); @@ -607,9 +608,12 @@ public: mChild[index]->destroy(); delete mChild[index]; } - mChild.erase(mChild.begin() + index); + --mChildCount; + mChild[index] = mChild[mChildCount]; + + //rebuild child map U32* foo = (U32*) mChildMap; foo[0] = foo[1] = 0xFFFFFFFF; @@ -665,7 +669,7 @@ protected: oct_node* mParent; U8 mOctant; - child_list mChild; + LLOctreeNode* mChild[8]; U8 mChildMap[8]; U32 mChildCount; diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 919079fa88..85ea14f9bc 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -341,7 +341,7 @@ public: max.setMax(max, *tri->mV[2]); } } - else if (!branch->getChildren().empty()) + else if (!branch->isLeaf()) { //no data, but child nodes exist LLVolumeOctreeListener* child = (LLVolumeOctreeListener*) branch->getChild(0)->getListener(0); -- cgit v1.2.3