diff options
Diffstat (limited to 'indra/llmath')
-rw-r--r-- | indra/llmath/CMakeLists.txt | 1 | ||||
-rw-r--r-- | indra/llmath/llcalcparser.h | 2 | ||||
-rw-r--r-- | indra/llmath/llcamera.h | 12 | ||||
-rw-r--r-- | indra/llmath/llcoord.h | 117 | ||||
-rw-r--r-- | indra/llmath/llmath.h | 6 | ||||
-rw-r--r-- | indra/llmath/llmatrix3a.h | 2 | ||||
-rw-r--r-- | indra/llmath/llmatrix4a.h | 2 | ||||
-rw-r--r-- | indra/llmath/lloctree.h | 159 | ||||
-rw-r--r-- | indra/llmath/llplane.h | 4 | ||||
-rw-r--r-- | indra/llmath/llsimdmath.h | 3 | ||||
-rw-r--r-- | indra/llmath/llsimdtypes.inl | 2 | ||||
-rw-r--r-- | indra/llmath/llvector4a.cpp | 8 | ||||
-rw-r--r-- | indra/llmath/llvector4a.h | 6 | ||||
-rw-r--r-- | indra/llmath/llvector4a.inl | 1 | ||||
-rw-r--r-- | indra/llmath/llvector4logical.h | 2 | ||||
-rw-r--r-- | indra/llmath/llvolume.cpp | 436 | ||||
-rw-r--r-- | indra/llmath/llvolume.h | 18 | ||||
-rw-r--r-- | indra/llmath/llvolumemgr.cpp | 3 | ||||
-rw-r--r-- | indra/llmath/llvolumeoctree.cpp | 6 | ||||
-rw-r--r-- | indra/llmath/llvolumeoctree.h | 35 | ||||
-rw-r--r-- | indra/llmath/m4math.cpp | 38 | ||||
-rw-r--r-- | indra/llmath/tests/alignment_test.cpp | 141 | ||||
-rw-r--r-- | indra/llmath/v3color.h | 1 |
23 files changed, 515 insertions, 490 deletions
diff --git a/indra/llmath/CMakeLists.txt b/indra/llmath/CMakeLists.txt index b5e59c1ca3..5865ae030c 100644 --- a/indra/llmath/CMakeLists.txt +++ b/indra/llmath/CMakeLists.txt @@ -117,6 +117,7 @@ if (LL_TESTS) # INTEGRATION TESTS set(test_libs llmath llcommon ${LLCOMMON_LIBRARIES} ${WINDOWS_LIBRARIES}) # TODO: Some of these need refactoring to be proper Unit tests rather than Integration tests. + LL_ADD_INTEGRATION_TEST(alignment "" "${test_libs}") LL_ADD_INTEGRATION_TEST(llbbox llbbox.cpp "${test_libs}") LL_ADD_INTEGRATION_TEST(llquaternion llquaternion.cpp "${test_libs}") LL_ADD_INTEGRATION_TEST(mathmisc "" "${test_libs}") diff --git a/indra/llmath/llcalcparser.h b/indra/llmath/llcalcparser.h index bd9c8c2519..e0ad270266 100644 --- a/indra/llmath/llcalcparser.h +++ b/indra/llmath/llcalcparser.h @@ -174,7 +174,7 @@ private: F32 _log(const F32& a) const { return log(a); } F32 _exp(const F32& a) const { return exp(a); } F32 _fabs(const F32& a) const { return fabs(a); } - F32 _floor(const F32& a) const { return llfloor(a); } + F32 _floor(const F32& a) const { return (F32)llfloor(a); } F32 _ceil(const F32& a) const { return llceil(a); } F32 _atan2(const F32& a,const F32& b) const { return atan2(a,b); } diff --git a/indra/llmath/llcamera.h b/indra/llmath/llcamera.h index ec67b91d05..0b591be622 100644 --- a/indra/llmath/llcamera.h +++ b/indra/llmath/llcamera.h @@ -60,7 +60,7 @@ static const F32 MAX_FIELD_OF_VIEW = 175.f * DEG_TO_RAD; // roll(), pitch(), yaw() // etc... - +LL_ALIGN_PREFIX(16) class LLCamera : public LLCoordFrame { @@ -108,7 +108,7 @@ public: }; private: - LLPlane mAgentPlanes[7]; //frustum planes in agent space a la gluUnproject (I'm a bastard, I know) - DaveP + LL_ALIGN_16(LLPlane mAgentPlanes[7]); //frustum planes in agent space a la gluUnproject (I'm a bastard, I know) - DaveP U8 mPlaneMask[8]; // 8 for alignment F32 mView; // angle between top and bottom frustum planes in radians. @@ -116,13 +116,13 @@ private: S32 mViewHeightInPixels; // for ViewHeightInPixels() only F32 mNearPlane; F32 mFarPlane; - LLPlane mLocalPlanes[4]; + LL_ALIGN_16(LLPlane mLocalPlanes[4]); F32 mFixedDistance; // Always return this distance, unless < 0 LLVector3 mFrustCenter; // center of frustum and radius squared for ultra-quick exclusion test F32 mFrustRadiusSquared; - LLPlane mWorldPlanes[PLANE_NUM]; - LLPlane mHorizPlanes[HORIZ_PLANE_NUM]; + LL_ALIGN_16(LLPlane mWorldPlanes[PLANE_NUM]); + LL_ALIGN_16(LLPlane mHorizPlanes[HORIZ_PLANE_NUM]); U32 mPlaneCount; //defaults to 6, if setUserClipPlane is called, uses user supplied clip plane in @@ -208,7 +208,7 @@ protected: void calculateFrustumPlanes(F32 left, F32 right, F32 top, F32 bottom); void calculateFrustumPlanesFromWindow(F32 x1, F32 y1, F32 x2, F32 y2); void calculateWorldFrustumPlanes(); -}; +} LL_ALIGN_POSTFIX(16); #endif diff --git a/indra/llmath/llcoord.h b/indra/llmath/llcoord.h index 706ad92787..9b76268afd 100644 --- a/indra/llmath/llcoord.h +++ b/indra/llmath/llcoord.h @@ -26,80 +26,87 @@ #ifndef LL_LLCOORD_H #define LL_LLCOORD_H +template<typename> class LLCoord; +struct LL_COORD_TYPE_GL; +struct LL_COORD_TYPE_WINDOW; +struct LL_COORD_TYPE_SCREEN; + +typedef LLCoord<LL_COORD_TYPE_GL> LLCoordGL; +typedef LLCoord<LL_COORD_TYPE_WINDOW> LLCoordWindow; +typedef LLCoord<LL_COORD_TYPE_SCREEN> LLCoordScreen; + +struct LLCoordCommon +{ + LLCoordCommon(S32 x, S32 y) : mX(x), mY(y) {} + LLCoordCommon() : mX(0), mY(0) {} + S32 mX; + S32 mY; +}; + // A two-dimensional pixel value -class LLCoord +template<typename COORD_FRAME> +class LLCoord : protected COORD_FRAME { public: - S32 mX; - S32 mY; + typedef LLCoord<COORD_FRAME> self_t; + typename COORD_FRAME::value_t mX; + typename COORD_FRAME::value_t mY; LLCoord(): mX(0), mY(0) {} - LLCoord(S32 x, S32 y): mX(x), mY(y) - {} - virtual ~LLCoord() + LLCoord(typename COORD_FRAME::value_t x, typename COORD_FRAME::value_t y): mX(x), mY(y) {} - virtual void set(S32 x, S32 y) { mX = x; mY = y; } -}; + LLCoord(const LLCoordCommon& other) + { + COORD_FRAME::convertFromCommon(other); + } + LLCoordCommon convert() const + { + return COORD_FRAME::convertToCommon(); + } -// GL coordinates start in the client region of a window, -// with left, bottom = 0, 0 -class LLCoordGL : public LLCoord -{ -public: - LLCoordGL() : LLCoord() - {} - LLCoordGL(S32 x, S32 y) : LLCoord(x, y) - {} - bool operator==(const LLCoordGL& other) const { return mX == other.mX && mY == other.mY; } - bool operator!=(const LLCoordGL& other) const { return !(*this == other); } -}; + void set(typename COORD_FRAME::value_t x, typename COORD_FRAME::value_t y) { mX = x; mY = y;} + bool operator==(const self_t& other) const { return mX == other.mX && mY == other.mY; } + bool operator!=(const self_t& other) const { return !(*this == other); } -//bool operator ==(const LLCoordGL& a, const LLCoordGL& b); + static const self_t& getTypedCoords(const COORD_FRAME& self) { return static_cast<const self_t&>(self); } + static self_t& getTypedCoords(COORD_FRAME& self) { return static_cast<self_t&>(self); } +}; -// Window coords include things like window borders, -// menu regions, etc. -class LLCoordWindow : public LLCoord +struct LL_COORD_TYPE_GL { -public: - LLCoordWindow() : LLCoord() - {} - LLCoordWindow(S32 x, S32 y) : LLCoord(x, y) - {} - bool operator==(const LLCoordWindow& other) const { return mX == other.mX && mY == other.mY; } - bool operator!=(const LLCoordWindow& other) const { return !(*this == other); } -}; + typedef S32 value_t; + LLCoordCommon convertToCommon() const + { + const LLCoordGL& self = LLCoordGL::getTypedCoords(*this); + return LLCoordCommon(self.mX, self.mY); + } -// Screen coords start at left, top = 0, 0 -class LLCoordScreen : public LLCoord + void convertFromCommon(const LLCoordCommon& from) + { + LLCoordGL& self = LLCoordGL::getTypedCoords(*this); + self.mX = from.mX; + self.mY = from.mY; + } +}; + +struct LL_COORD_TYPE_WINDOW { -public: - LLCoordScreen() : LLCoord() - {} - LLCoordScreen(S32 x, S32 y) : LLCoord(x, y) - {} - bool operator==(const LLCoordScreen& other) const { return mX == other.mX && mY == other.mY; } - bool operator!=(const LLCoordScreen& other) const { return !(*this == other); } + typedef S32 value_t; + + LLCoordCommon convertToCommon() const; + void convertFromCommon(const LLCoordCommon& from); }; -class LLCoordFont : public LLCoord +struct LL_COORD_TYPE_SCREEN { -public: - F32 mZ; - - LLCoordFont() : LLCoord(), mZ(0.f) - {} - LLCoordFont(S32 x, S32 y, F32 z = 0) : LLCoord(x,y), mZ(z) - {} - - void set(S32 x, S32 y) { LLCoord::set(x,y); mZ = 0.f; } - void set(S32 x, S32 y, F32 z) { mX = x; mY = y; mZ = z; } - bool operator==(const LLCoordFont& other) const { return mX == other.mX && mY == other.mY; } - bool operator!=(const LLCoordFont& other) const { return !(*this == other); } + typedef S32 value_t; + + LLCoordCommon convertToCommon() const; + void convertFromCommon(const LLCoordCommon& from); }; - #endif diff --git a/indra/llmath/llmath.h b/indra/llmath/llmath.h index 9297bcbac2..b93f89d674 100644 --- a/indra/llmath/llmath.h +++ b/indra/llmath/llmath.h @@ -85,7 +85,7 @@ const F32 F_ALMOST_ONE = 1.0f - F_ALMOST_ZERO; const F32 FP_MAG_THRESHOLD = 0.0000001f; // TODO: Replace with logic like is_approx_equal -inline BOOL is_approx_zero( F32 f ) { return (-F_APPROXIMATELY_ZERO < f) && (f < F_APPROXIMATELY_ZERO); } +inline bool is_approx_zero( F32 f ) { return (-F_APPROXIMATELY_ZERO < f) && (f < F_APPROXIMATELY_ZERO); } // These functions work by interpreting sign+exp+mantissa as an unsigned // integer. @@ -111,13 +111,13 @@ inline BOOL is_approx_zero( F32 f ) { return (-F_APPROXIMATELY_ZERO < f) && (f < // WARNING: Infinity is comparable with F32_MAX and negative // infinity is comparable with F32_MIN -inline BOOL is_approx_equal(F32 x, F32 y) +inline bool is_approx_equal(F32 x, F32 y) { const S32 COMPARE_MANTISSA_UP_TO_BIT = 0x02; return (std::abs((S32) ((U32&)x - (U32&)y) ) < COMPARE_MANTISSA_UP_TO_BIT); } -inline BOOL is_approx_equal(F64 x, F64 y) +inline bool is_approx_equal(F64 x, F64 y) { const S64 COMPARE_MANTISSA_UP_TO_BIT = 0x02; return (std::abs((S32) ((U64&)x - (U64&)y) ) < COMPARE_MANTISSA_UP_TO_BIT); diff --git a/indra/llmath/llmatrix3a.h b/indra/llmath/llmatrix3a.h index adb7e3389d..9916cfd2da 100644 --- a/indra/llmath/llmatrix3a.h +++ b/indra/llmath/llmatrix3a.h @@ -111,7 +111,7 @@ public: protected: - LLVector4a mColumns[3]; + LL_ALIGN_16(LLVector4a mColumns[3]); }; diff --git a/indra/llmath/llmatrix4a.h b/indra/llmath/llmatrix4a.h index 27cf5b79f6..c4cefdb4fa 100644 --- a/indra/llmath/llmatrix4a.h +++ b/indra/llmath/llmatrix4a.h @@ -34,7 +34,7 @@ class LLMatrix4a { public: - LLVector4a mMatrix[4]; + LL_ALIGN_16(LLVector4a mMatrix[4]); inline void clear() { diff --git a/indra/llmath/lloctree.h b/indra/llmath/lloctree.h index 3c1ae45d68..7348904c61 100644 --- a/indra/llmath/lloctree.h +++ b/indra/llmath/lloctree.h @@ -31,7 +31,6 @@ #include "v3math.h" #include "llvector4a.h" #include <vector> -#include <set> #define OCT_ERRS LL_WARNS("OctreeErrors") @@ -79,16 +78,18 @@ public: typedef LLOctreeTraveler<T> oct_traveler; typedef LLTreeTraveler<T> tree_traveler; - typedef typename std::set<LLPointer<T> > element_list; - typedef typename std::set<LLPointer<T> >::iterator element_iter; - typedef typename std::set<LLPointer<T> >::const_iterator const_element_iter; + typedef std::vector< LLPointer<T> > element_list; // note: don't remove the whitespace between "> >" + typedef LLPointer<T>* element_iter; + typedef const LLPointer<T>* const_element_iter; typedef typename std::vector<LLTreeListener<T>*>::iterator tree_listener_iter; - typedef typename std::vector<LLOctreeNode<T>* > child_list; + typedef LLOctreeNode<T>** child_list; + typedef LLOctreeNode<T>** child_iter; + typedef LLTreeNode<T> BaseType; typedef LLOctreeNode<T> oct_node; typedef LLOctreeListener<T> oct_listener; - /*void* operator new(size_t size) + void* operator new(size_t size) { return ll_aligned_malloc_16(size); } @@ -96,7 +97,7 @@ public: void operator delete(void* ptr) { ll_aligned_free_16(ptr); - }*/ + } LLOctreeNode( const LLVector4a& center, const LLVector4a& size, @@ -105,6 +106,10 @@ public: : mParent((oct_node*)parent), mOctant(octant) { + //always keep a NULL terminated list to avoid out of bounds exceptions in debug builds + mData.push_back(NULL); + mDataEnd = &mData[0]; + mCenter = center; mSize = size; @@ -114,6 +119,8 @@ public: mOctant = ((oct_node*) mParent)->getOctant(mCenter); } + mElementCount = 0; + clearChildren(); } @@ -121,6 +128,16 @@ public: { BaseType::destroyListeners(); + for (U32 i = 0; i < mElementCount; ++i) + { + mData[i]->setBinIndex(-1); + mData[i] = NULL; + } + + mData.clear(); + mData.push_back(NULL); + mDataEnd = &mData[0]; + for (U32 i = 0; i < getChildCount(); i++) { delete getChild(i); @@ -217,13 +234,18 @@ public: } void accept(oct_traveler* visitor) { visitor->visit(this); } - virtual bool isLeaf() const { return mChild.empty(); } + virtual bool isLeaf() const { return mChildCount == 0; } - U32 getElementCount() const { return mData.size(); } + U32 getElementCount() const { return mElementCount; } + bool isEmpty() const { return mElementCount == 0; } element_list& getData() { return mData; } const element_list& getData() const { return mData; } - - U32 getChildCount() const { return mChild.size(); } + element_iter getDataBegin() { return &mData[0]; } + element_iter getDataEnd() { return mDataEnd; } + const_element_iter getDataBegin() const { return &mData[0]; } + const_element_iter getDataEnd() const { return mDataEnd; } + + U32 getChildCount() const { return mChildCount; } oct_node* getChild(U32 index) { return mChild[index]; } const oct_node* getChild(U32 index) const { return mChild[index]; } child_list& getChildren() { return mChild; } @@ -287,7 +309,7 @@ public: virtual bool insert(T* data) { - if (data == NULL) + if (data == NULL || data->getBinIndex() != -1) { OCT_ERRS << "!!! INVALID ELEMENT ADDED TO OCTREE BRANCH !!!" << llendl; return false; @@ -300,16 +322,11 @@ public: if ((getElementCount() < gOctreeMaxCapacity && contains(data->getBinRadius()) || (data->getBinRadius() > getSize()[0] && parent && parent->getElementCount() >= gOctreeMaxCapacity))) { //it belongs here -#if LL_OCTREE_PARANOIA_CHECK - //if this is a redundant insertion, error out (should never happen) - if (mData.find(data) != mData.end()) - { - llwarns << "Redundant octree insertion detected. " << data << llendl; - return false; - } -#endif - - mData.insert(data); + mData.push_back(NULL); + mData[mElementCount] = data; + mElementCount++; + mDataEnd = &mData[mElementCount]; + data->setBinIndex(mElementCount-1); BaseType::insert(data); return true; } @@ -344,7 +361,11 @@ public: if( lt == 0x7 ) { - mData.insert(data); + mData.push_back(NULL); + mData[mElementCount] = data; + mElementCount++; + mDataEnd = &mData[mElementCount]; + data->setBinIndex(mElementCount-1); BaseType::insert(data); return true; } @@ -394,22 +415,58 @@ public: return false; } + void _remove(T* data, S32 i) + { //precondition -- mElementCount > 0, idx is in range [0, mElementCount) + + mElementCount--; + data->setBinIndex(-1); + + if (mElementCount > 0) + { + if (mElementCount != i) + { + mData[i] = mData[mElementCount]; //might unref data, do not access data after this point + mData[i]->setBinIndex(i); + } + + mData[mElementCount] = NULL; + mData.pop_back(); + mDataEnd = &mData[mElementCount]; + } + else + { + mData.clear(); + mData.push_back(NULL); + mDataEnd = &mData[0]; + } + + notifyRemoval(data); + checkAlive(); + } + bool remove(T* data) { - if (mData.find(data) != mData.end()) - { //we have data - mData.erase(data); - notifyRemoval(data); - checkAlive(); - return true; - } - else if (isInside(data)) + S32 i = data->getBinIndex(); + + if (i >= 0 && i < mElementCount) + { + if (mData[i] == data) + { //found it + _remove(data, i); + llassert(data->getBinIndex() == -1); + return true; + } + } + + if (isInside(data)) { oct_node* dest = getNodeAt(data); if (dest != this) { - return dest->remove(data); + bool ret = dest->remove(data); + llassert(data->getBinIndex() == -1); + return ret; } } @@ -426,20 +483,22 @@ public: } //node is now root - llwarns << "!!! OCTREE REMOVING FACE BY ADDRESS, SEVERE PERFORMANCE PENALTY |||" << llendl; + llwarns << "!!! OCTREE REMOVING ELEMENT BY ADDRESS, SEVERE PERFORMANCE PENALTY |||" << llendl; node->removeByAddress(data); + llassert(data->getBinIndex() == -1); return true; } void removeByAddress(T* data) { - if (mData.find(data) != mData.end()) + for (U32 i = 0; i < mElementCount; ++i) { - mData.erase(data); - notifyRemoval(data); - llwarns << "FOUND!" << llendl; - checkAlive(); - return; + if (mData[i] == data) + { //we have data + _remove(data, i); + llwarns << "FOUND!" << llendl; + return; + } } for (U32 i = 0; i < getChildCount(); i++) @@ -451,7 +510,7 @@ public: void clearChildren() { - mChild.clear(); + mChildCount = 0; U32* foo = (U32*) mChildMap; foo[0] = foo[1] = 0xFFFFFFFF; @@ -512,9 +571,10 @@ public: } #endif - mChildMap[child->getOctant()] = (U8) mChild.size(); + mChildMap[child->getOctant()] = mChildCount; - mChild.push_back(child); + mChild[mChildCount] = child; + ++mChildCount; child->setParent(this); if (!silent) @@ -534,21 +594,23 @@ public: oct_listener* listener = getOctListener(i); listener->handleChildRemoval(this, getChild(index)); } - - if (destroy) { mChild[index]->destroy(); delete mChild[index]; } - mChild.erase(mChild.begin() + index); + + --mChildCount; + + mChild[index] = mChild[mChildCount]; + //rebuild child map U32* foo = (U32*) mChildMap; foo[0] = foo[1] = 0xFFFFFFFF; - for (U32 i = 0; i < mChild.size(); ++i) + for (U32 i = 0; i < mChildCount; ++i) { mChildMap[mChild[i]->getOctant()] = i; } @@ -599,10 +661,13 @@ protected: oct_node* mParent; U8 mOctant; - child_list mChild; + LLOctreeNode<T>* mChild[8]; U8 mChildMap[8]; + U32 mChildCount; element_list mData; + element_iter mDataEnd; + U32 mElementCount; }; diff --git a/indra/llmath/llplane.h b/indra/llmath/llplane.h index a611894721..3c32441b11 100644 --- a/indra/llmath/llplane.h +++ b/indra/llmath/llplane.h @@ -36,6 +36,8 @@ // The plane normal = [A, B, C] // The closest approach = D / sqrt(A*A + B*B + C*C) + +LL_ALIGN_PREFIX(16) class LLPlane { public: @@ -94,7 +96,7 @@ public: private: LLVector4a mV; -}; +} LL_ALIGN_POSTFIX(16); diff --git a/indra/llmath/llsimdmath.h b/indra/llmath/llsimdmath.h index c7cdf7b32c..01458521ec 100644 --- a/indra/llmath/llsimdmath.h +++ b/indra/llmath/llsimdmath.h @@ -67,11 +67,10 @@ template <typename T> T* LL_NEXT_ALIGNED_ADDRESS_64(T* address) #define LL_ALIGN_16(var) LL_ALIGN_PREFIX(16) var LL_ALIGN_POSTFIX(16) - - #include <xmmintrin.h> #include <emmintrin.h> +#include "llmemory.h" #include "llsimdtypes.h" #include "llsimdtypes.inl" diff --git a/indra/llmath/llsimdtypes.inl b/indra/llmath/llsimdtypes.inl index 712239e425..e905c84954 100644 --- a/indra/llmath/llsimdtypes.inl +++ b/indra/llmath/llsimdtypes.inl @@ -62,6 +62,7 @@ inline LLSimdScalar operator/(const LLSimdScalar& a, const LLSimdScalar& b) inline LLSimdScalar operator-(const LLSimdScalar& a) { static LL_ALIGN_16(const U32 signMask[4]) = {0x80000000, 0x80000000, 0x80000000, 0x80000000 }; + ll_assert_aligned(signMask,16); return _mm_xor_ps(*reinterpret_cast<const LLQuad*>(signMask), a); } @@ -146,6 +147,7 @@ inline LLSimdScalar& LLSimdScalar::operator/=(const LLSimdScalar& rhs) inline LLSimdScalar LLSimdScalar::getAbs() const { static const LL_ALIGN_16(U32 F_ABS_MASK_4A[4]) = { 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF }; + ll_assert_aligned(F_ABS_MASK_4A,16); return _mm_and_ps( mQ, *reinterpret_cast<const LLQuad*>(F_ABS_MASK_4A)); } diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp index b66b7a7076..6edeb0fefe 100644 --- a/indra/llmath/llvector4a.cpp +++ b/indra/llmath/llvector4a.cpp @@ -24,6 +24,7 @@ * $/LicenseInfo$ */ +#include "llmemory.h" #include "llmath.h" #include "llquantize.h" @@ -44,7 +45,10 @@ extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast<const LLVector4a&> ( F assert(dst != NULL); assert(bytes > 0); assert((bytes % sizeof(F32))== 0); - + ll_assert_aligned(src,16); + ll_assert_aligned(dst,16); + assert(bytes%16==0); + F32* end = dst + (bytes / sizeof(F32) ); if (bytes > 64) @@ -189,6 +193,8 @@ void LLVector4a::quantize16( const LLVector4a& low, const LLVector4a& high ) LLVector4a oneOverDelta; { static LL_ALIGN_16( const F32 F_TWO_4A[4] ) = { 2.f, 2.f, 2.f, 2.f }; + ll_assert_aligned(F_TWO_4A,16); + LLVector4a two; two.load4a( F_TWO_4A ); // Here we use _mm_rcp_ps plus one round of newton-raphson diff --git a/indra/llmath/llvector4a.h b/indra/llmath/llvector4a.h index 596082509d..0526793d3a 100644 --- a/indra/llmath/llvector4a.h +++ b/indra/llmath/llvector4a.h @@ -32,6 +32,7 @@ class LLRotation; #include <assert.h> #include "llpreprocessor.h" +#include "llmemory.h" /////////////////////////////////// // FIRST TIME USERS PLEASE READ @@ -46,6 +47,7 @@ class LLRotation; // LLVector3/LLVector4. ///////////////////////////////// +LL_ALIGN_PREFIX(16) class LLVector4a { public: @@ -82,6 +84,7 @@ public: } // Copy words 16-byte blocks from src to dst. Source and destination must not overlap. + // Source and dest must be 16-byte aligned and size must be multiple of 16. static void memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes); //////////////////////////////////// @@ -90,6 +93,7 @@ public: LLVector4a() { //DO NOT INITIALIZE -- The overhead is completely unnecessary + ll_assert_aligned(this,16); } LLVector4a(F32 x, F32 y, F32 z, F32 w = 0.f) @@ -313,7 +317,7 @@ public: private: LLQuad mQ; -}; +} LL_ALIGN_POSTFIX(16); inline void update_min_max(LLVector4a& min, LLVector4a& max, const LLVector4a& p) { diff --git a/indra/llmath/llvector4a.inl b/indra/llmath/llvector4a.inl index 7ad22a5631..7c52ffef21 100644 --- a/indra/llmath/llvector4a.inl +++ b/indra/llmath/llvector4a.inl @@ -475,6 +475,7 @@ inline void LLVector4a::setLerp(const LLVector4a& lhs, const LLVector4a& rhs, F3 inline LLBool32 LLVector4a::isFinite3() const { static LL_ALIGN_16(const U32 nanOrInfMask[4]) = { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 }; + ll_assert_aligned(nanOrInfMask,16); const __m128i nanOrInfMaskV = *reinterpret_cast<const __m128i*> (nanOrInfMask); const __m128i maskResult = _mm_and_si128( _mm_castps_si128(mQ), nanOrInfMaskV ); const LLVector4Logical equalityCheck = _mm_castsi128_ps(_mm_cmpeq_epi32( maskResult, nanOrInfMaskV )); diff --git a/indra/llmath/llvector4logical.h b/indra/llmath/llvector4logical.h index dd66b09d43..c5698f7cea 100644 --- a/indra/llmath/llvector4logical.h +++ b/indra/llmath/llvector4logical.h @@ -27,6 +27,7 @@ #ifndef LL_VECTOR4LOGICAL_H #define LL_VECTOR4LOGICAL_H +#include "llmemory.h" //////////////////////////// // LLVector4Logical @@ -77,6 +78,7 @@ public: inline LLVector4Logical& invert() { static const LL_ALIGN_16(U32 allOnes[4]) = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF }; + ll_assert_aligned(allOnes,16); mQ = _mm_andnot_ps( mQ, *(LLQuad*)(allOnes) ); return *this; } diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index da0fa32963..02c8d2b86f 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -35,7 +35,6 @@ #include <cmath> #include "llerror.h" -#include "llmemtype.h" #include "llvolumemgr.h" #include "v2math.h" @@ -95,17 +94,6 @@ const S32 SCULPT_MIN_AREA_DETAIL = 1; extern BOOL gDebugGL; -void assert_aligned(void* ptr, uintptr_t alignment) -{ -#if 0 - uintptr_t t = (uintptr_t) ptr; - if (t%alignment != 0) - { - llerrs << "Alignment check failed." << llendl; - } -#endif -} - BOOL check_same_clock_dir( const LLVector3& pt1, const LLVector3& pt2, const LLVector3& pt3, const LLVector3& norm) { LLVector3 test = (pt2-pt1)%(pt3-pt2); @@ -328,16 +316,16 @@ public: LLVector4a& min = node->mExtents[0]; LLVector4a& max = node->mExtents[1]; - if (!branch->getData().empty()) + if (!branch->isEmpty()) { //node has data, find AABB that binds data set - const LLVolumeTriangle* tri = *(branch->getData().begin()); + const LLVolumeTriangle* tri = *(branch->getDataBegin()); //initialize min/max to first available vertex min = *(tri->mV[0]); max = *(tri->mV[0]); for (LLOctreeNode<LLVolumeTriangle>::const_element_iter iter = - branch->getData().begin(); iter != branch->getData().end(); ++iter) + branch->getDataBegin(); iter != branch->getDataEnd(); ++iter) { //for each triangle in node //stretch by triangles in node @@ -352,7 +340,7 @@ public: max.setMax(max, *tri->mV[2]); } } - else if (!branch->getChildren().empty()) + else if (!branch->isLeaf()) { //no data, but child nodes exist LLVolumeOctreeListener* child = (LLVolumeOctreeListener*) branch->getChild(0)->getListener(0); @@ -389,8 +377,6 @@ public: LLProfile::Face* LLProfile::addCap(S16 faceID) { - LLMemType m1(LLMemType::MTYPE_VOLUME); - Face *face = vector_append(mFaces, 1); face->mIndex = 0; @@ -403,8 +389,6 @@ LLProfile::Face* LLProfile::addCap(S16 faceID) LLProfile::Face* LLProfile::addFace(S32 i, S32 count, F32 scaleU, S16 faceID, BOOL flat) { - LLMemType m1(LLMemType::MTYPE_VOLUME); - Face *face = vector_append(mFaces, 1); face->mIndex = i; @@ -420,7 +404,6 @@ LLProfile::Face* LLProfile::addFace(S32 i, S32 count, F32 scaleU, S16 faceID, BO //static S32 LLProfile::getNumNGonPoints(const LLProfileParams& params, S32 sides, F32 offset, F32 bevel, F32 ang_scale, S32 split) { // this is basically LLProfile::genNGon stripped down to only the operations that influence the number of points - LLMemType m1(LLMemType::MTYPE_VOLUME); S32 np = 0; // Generate an n-sided "circular" path. @@ -486,8 +469,6 @@ S32 LLProfile::getNumNGonPoints(const LLProfileParams& params, S32 sides, F32 of // filleted and chamfered corners void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F32 bevel, F32 ang_scale, S32 split) { - LLMemType m1(LLMemType::MTYPE_VOLUME); - // Generate an n-sided "circular" path. // 0 is (1,0), and we go counter-clockwise along a circular path from there. const F32 tableScale[] = { 1, 1, 1, 0.5f, 0.707107f, 0.53f, 0.525f, 0.5f }; @@ -741,8 +722,6 @@ LLProfile::Face* LLProfile::addHole(const LLProfileParams& params, BOOL flat, F3 S32 LLProfile::getNumPoints(const LLProfileParams& params, BOOL path_open,F32 detail, S32 split, BOOL is_sculpted, S32 sculpt_size) { // this is basically LLProfile::generate stripped down to only operations that influence the number of points - LLMemType m1(LLMemType::MTYPE_VOLUME); - if (detail < MIN_LOD) { detail = MIN_LOD; @@ -853,8 +832,6 @@ S32 LLProfile::getNumPoints(const LLProfileParams& params, BOOL path_open,F32 de BOOL LLProfile::generate(const LLProfileParams& params, BOOL path_open,F32 detail, S32 split, BOOL is_sculpted, S32 sculpt_size) { - LLMemType m1(LLMemType::MTYPE_VOLUME); - if ((!mDirty) && (!is_sculpted)) { return FALSE; @@ -1127,8 +1104,6 @@ BOOL LLProfile::generate(const LLProfileParams& params, BOOL path_open,F32 detai BOOL LLProfileParams::importFile(LLFILE *fp) { - LLMemType m1(LLMemType::MTYPE_VOLUME); - const S32 BUFSIZE = 16384; char buffer[BUFSIZE]; /* Flawfinder: ignore */ // *NOTE: changing the size or type of these buffers will require @@ -1204,8 +1179,6 @@ BOOL LLProfileParams::exportFile(LLFILE *fp) const BOOL LLProfileParams::importLegacyStream(std::istream& input_stream) { - LLMemType m1(LLMemType::MTYPE_VOLUME); - const S32 BUFSIZE = 16384; char buffer[BUFSIZE]; /* Flawfinder: ignore */ // *NOTE: changing the size or type of these buffers will require @@ -1297,7 +1270,6 @@ bool LLProfileParams::fromLLSD(LLSD& sd) void LLProfileParams::copyParams(const LLProfileParams ¶ms) { - LLMemType m1(LLMemType::MTYPE_VOLUME); setCurveType(params.getCurveType()); setBegin(params.getBegin()); setEnd(params.getEnd()); @@ -1514,8 +1486,6 @@ const LLVector2 LLPathParams::getEndScale() const S32 LLPath::getNumPoints(const LLPathParams& params, F32 detail) { // this is basically LLPath::generate stripped down to only the operations that influence the number of points - LLMemType m1(LLMemType::MTYPE_VOLUME); - if (detail < MIN_LOD) { detail = MIN_LOD; @@ -1565,8 +1535,6 @@ S32 LLPath::getNumPoints(const LLPathParams& params, F32 detail) BOOL LLPath::generate(const LLPathParams& params, F32 detail, S32 split, BOOL is_sculpted, S32 sculpt_size) { - LLMemType m1(LLMemType::MTYPE_VOLUME); - if ((!mDirty) && (!is_sculpted)) { return FALSE; @@ -1694,8 +1662,6 @@ BOOL LLPath::generate(const LLPathParams& params, F32 detail, S32 split, BOOL LLDynamicPath::generate(const LLPathParams& params, F32 detail, S32 split, BOOL is_sculpted, S32 sculpt_size) { - LLMemType m1(LLMemType::MTYPE_VOLUME); - mOpen = TRUE; // Draw end caps if (getPathLength() == 0) { @@ -1717,8 +1683,6 @@ BOOL LLDynamicPath::generate(const LLPathParams& params, F32 detail, S32 split, BOOL LLPathParams::importFile(LLFILE *fp) { - LLMemType m1(LLMemType::MTYPE_VOLUME); - const S32 BUFSIZE = 16384; char buffer[BUFSIZE]; /* Flawfinder: ignore */ // *NOTE: changing the size or type of these buffers will require @@ -1863,8 +1827,6 @@ BOOL LLPathParams::exportFile(LLFILE *fp) const BOOL LLPathParams::importLegacyStream(std::istream& input_stream) { - LLMemType m1(LLMemType::MTYPE_VOLUME); - const S32 BUFSIZE = 16384; char buffer[BUFSIZE]; /* Flawfinder: ignore */ // *NOTE: changing the size or type of these buffers will require @@ -2072,12 +2034,11 @@ S32 LLVolume::sNumMeshPoints = 0; LLVolume::LLVolume(const LLVolumeParams ¶ms, const F32 detail, const BOOL generate_single_face, const BOOL is_unique) : mParams(params) { - LLMemType m1(LLMemType::MTYPE_VOLUME); - mUnique = is_unique; mFaceMask = 0x0; mDetail = detail; mSculptLevel = -2; + mSurfaceArea = 1.f; //only calculated for sculpts, defaults to 1 for all other prims mIsMeshAssetLoaded = FALSE; mLODScaleBias.setVec(1,1,1); mHullPoints = NULL; @@ -2144,7 +2105,6 @@ LLVolume::~LLVolume() BOOL LLVolume::generate() { - LLMemType m1(LLMemType::MTYPE_VOLUME); llassert_always(mProfilep); //Added 10.03.05 Dave Parks @@ -2740,8 +2700,6 @@ S32 LLVolume::getNumFaces() const void LLVolume::createVolumeFaces() { - LLMemType m1(LLMemType::MTYPE_VOLUME); - if (mGenerateSingleFace) { // do nothing @@ -2903,7 +2861,7 @@ F32 LLVolume::sculptGetSurfaceArea() // compute the area of the quad by taking the length of the cross product of the two triangles LLVector3 cross1 = (p1 - p2) % (p1 - p3); LLVector3 cross2 = (p4 - p2) % (p4 - p3); - area += (cross1.magVec() + cross2.magVec()) / 2.0; + area += (cross1.magVec() + cross2.magVec()) / 2.f; } } @@ -2913,8 +2871,6 @@ F32 LLVolume::sculptGetSurfaceArea() // create placeholder shape void LLVolume::sculptGeneratePlaceholder() { - LLMemType m1(LLMemType::MTYPE_VOLUME); - S32 sizeS = mPathp->mPath.size(); S32 sizeT = mProfilep->mProfile.size(); @@ -2951,9 +2907,6 @@ void LLVolume::sculptGenerateMapVertices(U16 sculpt_width, U16 sculpt_height, S8 BOOL sculpt_mirror = sculpt_type & LL_SCULPT_FLAG_MIRROR; BOOL reverse_horizontal = (sculpt_invert ? !sculpt_mirror : sculpt_mirror); // XOR - - LLMemType m1(LLMemType::MTYPE_VOLUME); - S32 sizeS = mPathp->mPath.size(); S32 sizeT = mProfilep->mProfile.size(); @@ -3102,7 +3055,6 @@ void sculpt_calc_mesh_resolution(U16 width, U16 height, U8 type, F32 detail, S32 // sculpt replaces generate() for sculpted surfaces void LLVolume::sculpt(U16 sculpt_width, U16 sculpt_height, S8 sculpt_components, const U8* sculpt_data, S32 sculpt_level) { - LLMemType m1(LLMemType::MTYPE_VOLUME); U8 sculpt_type = mParams.getSculptType(); BOOL data_is_empty = FALSE; @@ -3144,6 +3096,8 @@ void LLVolume::sculpt(U16 sculpt_width, U16 sculpt_height, S8 sculpt_components, { F32 area = sculptGetSurfaceArea(); + mSurfaceArea = area; + const F32 SCULPT_MAX_AREA = 384.f; if (area < SCULPT_MIN_AREA || area > SCULPT_MAX_AREA) @@ -3237,7 +3191,6 @@ bool LLVolumeParams::operator<(const LLVolumeParams ¶ms) const void LLVolumeParams::copyParams(const LLVolumeParams ¶ms) { - LLMemType m1(LLMemType::MTYPE_VOLUME); mProfileParams.copyParams(params.mProfileParams); mPathParams.copyParams(params.mPathParams); mSculptID = params.getSculptID(); @@ -3609,8 +3562,6 @@ bool LLVolumeParams::validate(U8 prof_curve, F32 prof_begin, F32 prof_end, F32 h S32 *LLVolume::getTriangleIndices(U32 &num_indices) const { - LLMemType m1(LLMemType::MTYPE_VOLUME); - S32 expected_num_triangle_indices = getNumTriangleIndices(); if (expected_num_triangle_indices > MAX_VOLUME_TRIANGLE_INDICES) { @@ -4338,8 +4289,6 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices, const LLMatrix3& norm_mat_in, S32 face_mask) { - LLMemType m1(LLMemType::MTYPE_VOLUME); - LLMatrix4a mat; mat.loadu(mat_in); @@ -4617,18 +4566,83 @@ S32 LLVolume::lineSegmentIntersect(const LLVector4a& start, const LLVector4a& en genBinormals(i); } - if (!face.mOctree) - { - face.createOctree(); - } - - //LLVector4a* p = (LLVector4a*) face.mPositions; + if (isUnique()) + { //don't bother with an octree for flexi volumes + U32 tri_count = face.mNumIndices/3; + + for (U32 j = 0; j < tri_count; ++j) + { + U16 idx0 = face.mIndices[j*3+0]; + U16 idx1 = face.mIndices[j*3+1]; + U16 idx2 = face.mIndices[j*3+2]; + + const LLVector4a& v0 = face.mPositions[idx0]; + const LLVector4a& v1 = face.mPositions[idx1]; + const LLVector4a& v2 = face.mPositions[idx2]; + + F32 a,b,t; - LLOctreeTriangleRayIntersect intersect(start, dir, &face, &closest_t, intersection, tex_coord, normal, bi_normal); - intersect.traverse(face.mOctree); - if (intersect.mHitFace) + if (LLTriangleRayIntersect(v0, v1, v2, + start, dir, a, b, t)) + { + if ((t >= 0.f) && // if hit is after start + (t <= 1.f) && // and before end + (t < closest_t)) // and this hit is closer + { + closest_t = t; + hit_face = i; + + if (intersection != NULL) + { + LLVector4a intersect = dir; + intersect.mul(closest_t); + intersect.add(start); + intersection->set(intersect.getF32ptr()); + } + + + if (tex_coord != NULL) + { + LLVector2* tc = (LLVector2*) face.mTexCoords; + *tex_coord = ((1.f - a - b) * tc[idx0] + + a * tc[idx1] + + b * tc[idx2]); + + } + + if (normal!= NULL) + { + LLVector4* norm = (LLVector4*) face.mNormals; + + *normal = ((1.f - a - b) * LLVector3(norm[idx0]) + + a * LLVector3(norm[idx1]) + + b * LLVector3(norm[idx2])); + } + + if (bi_normal != NULL) + { + LLVector4* binormal = (LLVector4*) face.mBinormals; + *bi_normal = ((1.f - a - b) * LLVector3(binormal[idx0]) + + a * LLVector3(binormal[idx1]) + + b * LLVector3(binormal[idx2])); + } + } + } + } + } + else { - hit_face = i; + if (!face.mOctree) + { + face.createOctree(); + } + + LLOctreeTriangleRayIntersect intersect(start, dir, &face, &closest_t, intersection, tex_coord, normal, bi_normal); + intersect.traverse(face.mOctree); + if (intersect.mHitFace) + { + hit_face = i; + } } } } @@ -4736,241 +4750,8 @@ BOOL equalTriangle(const S32 *a, const S32 *b) return FALSE; } -BOOL LLVolume::cleanupTriangleData( const S32 num_input_vertices, - const std::vector<Point>& input_vertices, - const S32 num_input_triangles, - S32 *input_triangles, - S32 &num_output_vertices, - LLVector3 **output_vertices, - S32 &num_output_triangles, - S32 **output_triangles) -{ - LLMemType m1(LLMemType::MTYPE_VOLUME); - - /* Testing: avoid any cleanup - static BOOL skip_cleanup = TRUE; - if ( skip_cleanup ) - { - num_output_vertices = num_input_vertices; - num_output_triangles = num_input_triangles; - - *output_vertices = new LLVector3[num_input_vertices]; - for (S32 index = 0; index < num_input_vertices; index++) - { - (*output_vertices)[index] = input_vertices[index].mPos; - } - - *output_triangles = new S32[num_input_triangles*3]; - memcpy(*output_triangles, input_triangles, 3*num_input_triangles*sizeof(S32)); // Flawfinder: ignore - return TRUE; - } - */ - - // Here's how we do this: - // Create a structure which contains the original vertex index and the - // LLVector3 data. - // "Sort" the data by the vectors - // Create an array the size of the old vertex list, with a mapping of - // old indices to new indices. - // Go through triangles, shift so the lowest index is first - // Sort triangles by first index - // Remove duplicate triangles - // Allocate and pack new triangle data. - - //LLTimer cleanupTimer; - //llinfos << "In vertices: " << num_input_vertices << llendl; - //llinfos << "In triangles: " << num_input_triangles << llendl; - - S32 i; - typedef std::multiset<LLVertexIndexPair*, lessVertex> vertex_set_t; - vertex_set_t vertex_list; - - LLVertexIndexPair *pairp = NULL; - for (i = 0; i < num_input_vertices; i++) - { - LLVertexIndexPair *new_pairp = new LLVertexIndexPair(input_vertices[i].mPos, i); - vertex_list.insert(new_pairp); - } - - // Generate the vertex mapping and the list of vertices without - // duplicates. This will crash if there are no vertices. - llassert(num_input_vertices > 0); // check for no vertices! - S32 *vertex_mapping = new S32[num_input_vertices]; - LLVector3 *new_vertices = new LLVector3[num_input_vertices]; - LLVertexIndexPair *prev_pairp = NULL; - - S32 new_num_vertices; - - new_num_vertices = 0; - for (vertex_set_t::iterator iter = vertex_list.begin(), - end = vertex_list.end(); - iter != end; iter++) - { - pairp = *iter; - if (!prev_pairp || ((pairp->mVertex - prev_pairp->mVertex).magVecSquared() >= VERTEX_SLOP_SQRD)) - { - new_vertices[new_num_vertices] = pairp->mVertex; - //llinfos << "Added vertex " << new_num_vertices << " : " << pairp->mVertex << llendl; - new_num_vertices++; - // Update the previous - prev_pairp = pairp; - } - else - { - //llinfos << "Removed duplicate vertex " << pairp->mVertex << ", distance magVecSquared() is " << (pairp->mVertex - prev_pairp->mVertex).magVecSquared() << llendl; - } - vertex_mapping[pairp->mIndex] = new_num_vertices - 1; - } - - // Iterate through triangles and remove degenerates, re-ordering vertices - // along the way. - S32 *new_triangles = new S32[num_input_triangles * 3]; - S32 new_num_triangles = 0; - - for (i = 0; i < num_input_triangles; i++) - { - S32 v1 = i*3; - S32 v2 = v1 + 1; - S32 v3 = v1 + 2; - - //llinfos << "Checking triangle " << input_triangles[v1] << ":" << input_triangles[v2] << ":" << input_triangles[v3] << llendl; - input_triangles[v1] = vertex_mapping[input_triangles[v1]]; - input_triangles[v2] = vertex_mapping[input_triangles[v2]]; - input_triangles[v3] = vertex_mapping[input_triangles[v3]]; - - if ((input_triangles[v1] == input_triangles[v2]) - || (input_triangles[v1] == input_triangles[v3]) - || (input_triangles[v2] == input_triangles[v3])) - { - //llinfos << "Removing degenerate triangle " << input_triangles[v1] << ":" << input_triangles[v2] << ":" << input_triangles[v3] << llendl; - // Degenerate triangle, skip - continue; - } - - if (input_triangles[v1] < input_triangles[v2]) - { - if (input_triangles[v1] < input_triangles[v3]) - { - // (0 < 1) && (0 < 2) - new_triangles[new_num_triangles*3] = input_triangles[v1]; - new_triangles[new_num_triangles*3+1] = input_triangles[v2]; - new_triangles[new_num_triangles*3+2] = input_triangles[v3]; - } - else - { - // (0 < 1) && (2 < 0) - new_triangles[new_num_triangles*3] = input_triangles[v3]; - new_triangles[new_num_triangles*3+1] = input_triangles[v1]; - new_triangles[new_num_triangles*3+2] = input_triangles[v2]; - } - } - else if (input_triangles[v2] < input_triangles[v3]) - { - // (1 < 0) && (1 < 2) - new_triangles[new_num_triangles*3] = input_triangles[v2]; - new_triangles[new_num_triangles*3+1] = input_triangles[v3]; - new_triangles[new_num_triangles*3+2] = input_triangles[v1]; - } - else - { - // (1 < 0) && (2 < 1) - new_triangles[new_num_triangles*3] = input_triangles[v3]; - new_triangles[new_num_triangles*3+1] = input_triangles[v1]; - new_triangles[new_num_triangles*3+2] = input_triangles[v2]; - } - new_num_triangles++; - } - - if (new_num_triangles == 0) - { - llwarns << "Created volume object with 0 faces." << llendl; - delete[] new_triangles; - delete[] vertex_mapping; - delete[] new_vertices; - return FALSE; - } - - typedef std::set<S32*, lessTriangle> triangle_set_t; - triangle_set_t triangle_list; - - for (i = 0; i < new_num_triangles; i++) - { - triangle_list.insert(&new_triangles[i*3]); - } - - // Sort through the triangle list, and delete duplicates - - S32 *prevp = NULL; - S32 *curp = NULL; - - S32 *sorted_tris = new S32[new_num_triangles*3]; - S32 cur_tri = 0; - for (triangle_set_t::iterator iter = triangle_list.begin(), - end = triangle_list.end(); - iter != end; iter++) - { - curp = *iter; - if (!prevp || !equalTriangle(prevp, curp)) - { - //llinfos << "Added triangle " << *curp << ":" << *(curp+1) << ":" << *(curp+2) << llendl; - sorted_tris[cur_tri*3] = *curp; - sorted_tris[cur_tri*3+1] = *(curp+1); - sorted_tris[cur_tri*3+2] = *(curp+2); - cur_tri++; - prevp = curp; - } - else - { - //llinfos << "Skipped triangle " << *curp << ":" << *(curp+1) << ":" << *(curp+2) << llendl; - } - } - - *output_vertices = new LLVector3[new_num_vertices]; - num_output_vertices = new_num_vertices; - for (i = 0; i < new_num_vertices; i++) - { - (*output_vertices)[i] = new_vertices[i]; - } - - *output_triangles = new S32[cur_tri*3]; - num_output_triangles = cur_tri; - memcpy(*output_triangles, sorted_tris, 3*cur_tri*sizeof(S32)); /* Flawfinder: ignore */ - - /* - llinfos << "Out vertices: " << num_output_vertices << llendl; - llinfos << "Out triangles: " << num_output_triangles << llendl; - for (i = 0; i < num_output_vertices; i++) - { - llinfos << i << ":" << (*output_vertices)[i] << llendl; - } - for (i = 0; i < num_output_triangles; i++) - { - llinfos << i << ":" << (*output_triangles)[i*3] << ":" << (*output_triangles)[i*3+1] << ":" << (*output_triangles)[i*3+2] << llendl; - } - */ - - //llinfos << "Out vertices: " << num_output_vertices << llendl; - //llinfos << "Out triangles: " << num_output_triangles << llendl; - delete[] vertex_mapping; - vertex_mapping = NULL; - delete[] new_vertices; - new_vertices = NULL; - delete[] new_triangles; - new_triangles = NULL; - delete[] sorted_tris; - sorted_tris = NULL; - triangle_list.clear(); - std::for_each(vertex_list.begin(), vertex_list.end(), DeletePointer()); - vertex_list.clear(); - - return TRUE; -} - - BOOL LLVolumeParams::importFile(LLFILE *fp) { - LLMemType m1(LLMemType::MTYPE_VOLUME); - //llinfos << "importing volume" << llendl; const S32 BUFSIZE = 16384; char buffer[BUFSIZE]; /* Flawfinder: ignore */ @@ -5025,8 +4806,6 @@ BOOL LLVolumeParams::exportFile(LLFILE *fp) const BOOL LLVolumeParams::importLegacyStream(std::istream& input_stream) { - LLMemType m1(LLMemType::MTYPE_VOLUME); - //llinfos << "importing volume" << llendl; const S32 BUFSIZE = 16384; // *NOTE: changing the size or type of this buffer will require @@ -5066,8 +4845,6 @@ BOOL LLVolumeParams::importLegacyStream(std::istream& input_stream) BOOL LLVolumeParams::exportLegacyStream(std::ostream& output_stream) const { - LLMemType m1(LLMemType::MTYPE_VOLUME); - output_stream <<"\tshape 0\n"; output_stream <<"\t{\n"; mPathParams.exportLegacyStream(output_stream); @@ -5822,7 +5599,7 @@ F32 find_vertex_score(LLVCacheVertexData& data) } //bonus points for having low valence - F32 valence_boost = powf(data.mActiveTriangles, -FindVertexScore_ValenceBoostPower); + F32 valence_boost = powf((F32)data.mActiveTriangles, -FindVertexScore_ValenceBoostPower); score += FindVertexScore_ValenceBoostScale * valence_boost; return score; @@ -6283,8 +6060,6 @@ void LerpPlanarVertex(LLVolumeFace::VertexData& v0, BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) { - LLMemType m1(LLMemType::MTYPE_VOLUME); - const std::vector<LLVolume::Point>& mesh = volume->getMesh(); const std::vector<LLVector3>& profile = volume->getProfile().mProfile; S32 max_s = volume->getProfile().getTotal(); @@ -6435,8 +6210,6 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) { - LLMemType m1(LLMemType::MTYPE_VOLUME); - if (!(mTypeMask & HOLLOW_MASK) && !(mTypeMask & OPEN_MASK) && ((volume->getParams().getPathParams().getBegin()==0.0f)&& @@ -6823,8 +6596,6 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) void LLVolumeFace::createBinormals() { - LLMemType m1(LLMemType::MTYPE_VOLUME); - if (!mBinormals) { allocateBinormals(mNumVertices); @@ -6894,14 +6665,14 @@ void LLVolumeFace::resizeVertices(S32 num_verts) if (num_verts) { mPositions = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); - assert_aligned(mPositions, 16); + ll_assert_aligned(mPositions, 16); mNormals = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); - assert_aligned(mNormals, 16); + ll_assert_aligned(mNormals, 16); //pad texture coordinate block end to allow for QWORD reads S32 size = ((num_verts*sizeof(LLVector2)) + 0xF) & ~0xF; mTexCoords = (LLVector2*) ll_aligned_malloc_16(size); - assert_aligned(mTexCoords, 16); + ll_assert_aligned(mTexCoords, 16); } else { @@ -6922,17 +6693,21 @@ void LLVolumeFace::pushVertex(const LLVector4a& pos, const LLVector4a& norm, con { S32 new_verts = mNumVertices+1; S32 new_size = new_verts*16; -// S32 old_size = mNumVertices*16; + S32 old_size = mNumVertices*16; //positions - mPositions = (LLVector4a*) realloc(mPositions, new_size); + mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_size, old_size); + ll_assert_aligned(mPositions,16); //normals - mNormals = (LLVector4a*) realloc(mNormals, new_size); - + mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_size, old_size); + ll_assert_aligned(mNormals,16); + //tex coords new_size = ((new_verts*8)+0xF) & ~0xF; - mTexCoords = (LLVector2*) realloc(mTexCoords, new_size); + old_size = ((mNumVertices*8)+0xF) & ~0xF; + mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, new_size, old_size); + ll_assert_aligned(mTexCoords,16); //just clear binormals @@ -6985,7 +6760,8 @@ void LLVolumeFace::pushIndex(const U16& idx) S32 old_size = ((mNumIndices*2)+0xF) & ~0xF; if (new_size != old_size) { - mIndices = (U16*) realloc(mIndices, new_size); + mIndices = (U16*) ll_aligned_realloc_16(mIndices, new_size, old_size); + ll_assert_aligned(mIndices,16); } mIndices[mNumIndices++] = idx; @@ -7026,12 +6802,12 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat } //allocate new buffer space - mPositions = (LLVector4a*) realloc(mPositions, new_count*sizeof(LLVector4a)); - assert_aligned(mPositions, 16); - mNormals = (LLVector4a*) realloc(mNormals, new_count*sizeof(LLVector4a)); - assert_aligned(mNormals, 16); - mTexCoords = (LLVector2*) realloc(mTexCoords, (new_count*sizeof(LLVector2)+0xF) & ~0xF); - assert_aligned(mTexCoords, 16); + mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_count*sizeof(LLVector4a), mNumVertices*sizeof(LLVector4a)); + ll_assert_aligned(mPositions, 16); + mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_count*sizeof(LLVector4a), mNumVertices*sizeof(LLVector4a)); + ll_assert_aligned(mNormals, 16); + mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, (new_count*sizeof(LLVector2)+0xF) & ~0xF, (mNumVertices*sizeof(LLVector2)+0xF) & ~0xF); + ll_assert_aligned(mTexCoords, 16); mNumVertices = new_count; @@ -7077,7 +6853,7 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat new_count = mNumIndices + face.mNumIndices; //allocate new index buffer - mIndices = (U16*) realloc(mIndices, (new_count*sizeof(U16)+0xF) & ~0xF); + mIndices = (U16*) ll_aligned_realloc_16(mIndices, (new_count*sizeof(U16)+0xF) & ~0xF, (mNumIndices*sizeof(U16)+0xF) & ~0xF); //get destination address into new index buffer U16* dst_idx = mIndices+mNumIndices; @@ -7091,8 +6867,6 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) { - LLMemType m1(LLMemType::MTYPE_VOLUME); - BOOL flat = mTypeMask & FLAT_MASK; U8 sculpt_type = volume->getParams().getSculptType(); diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h index afd1ec5eed..c845556557 100644 --- a/indra/llmath/llvolume.h +++ b/indra/llmath/llvolume.h @@ -54,6 +54,7 @@ class LLVolumeTriangle; #include "llstrider.h" #include "v4coloru.h" #include "llrefcount.h" +#include "llpointer.h" #include "llfile.h" //============================================================================ @@ -919,6 +920,10 @@ public: LLVector2* mTexCoords; U16* mIndices; + //vertex buffer filled in by LLFace to cache this volume face geometry in vram + // (declared as a LLPointer to LLRefCount to avoid dependency on LLVertexBuffer) + mutable LLPointer<LLRefCount> mVertexBuffer; + std::vector<S32> mEdge; //list of skin weights for rigged volumes @@ -963,6 +968,7 @@ public: S32 getNumFaces() const; S32 getNumVolumeFaces() const { return mVolumeFaces.size(); } F32 getDetail() const { return mDetail; } + F32 getSurfaceArea() const { return mSurfaceArea; } const LLVolumeParams& getParams() const { return mParams; } LLVolumeParams getCopyOfParams() const { return mParams; } const LLProfile& getProfile() const { return *mProfilep; } @@ -1017,17 +1023,6 @@ public: LLVector3* normal = NULL, LLVector3* bi_normal = NULL); - // The following cleans up vertices and triangles, - // getting rid of degenerate triangles and duplicate vertices, - // and allocates new arrays with the clean data. - static BOOL cleanupTriangleData( const S32 num_input_vertices, - const std::vector<Point> &input_vertices, - const S32 num_input_triangles, - S32 *input_triangles, - S32 &num_output_vertices, - LLVector3 **output_vertices, - S32 &num_output_triangles, - S32 **output_triangles); LLFaceID generateFaceMask(); BOOL isFaceMaskValid(LLFaceID face_mask); @@ -1065,6 +1060,7 @@ public: BOOL mUnique; F32 mDetail; S32 mSculptLevel; + F32 mSurfaceArea; //unscaled surface area BOOL mIsMeshAssetLoaded; LLVolumeParams mParams; diff --git a/indra/llmath/llvolumemgr.cpp b/indra/llmath/llvolumemgr.cpp index c60b750088..9083273ee5 100644 --- a/indra/llmath/llvolumemgr.cpp +++ b/indra/llmath/llvolumemgr.cpp @@ -26,7 +26,6 @@ #include "linden_common.h" #include "llvolumemgr.h" -#include "llmemtype.h" #include "llvolume.h" @@ -182,7 +181,6 @@ void LLVolumeMgr::insertGroup(LLVolumeLODGroup* volgroup) // protected LLVolumeLODGroup* LLVolumeMgr::createNewGroup(const LLVolumeParams& volume_params) { - LLMemType m1(LLMemType::MTYPE_VOLUME); LLVolumeLODGroup* volgroup = new LLVolumeLODGroup(volume_params); insertGroup(volgroup); return volgroup; @@ -297,7 +295,6 @@ LLVolume* LLVolumeLODGroup::refLOD(const S32 detail) mRefs++; if (mVolumeLODs[detail].isNull()) { - LLMemType m1(LLMemType::MTYPE_VOLUME); mVolumeLODs[detail] = new LLVolume(mVolumeParams, mDetailScales[detail]); } mLODRefs[detail]++; diff --git a/indra/llmath/llvolumeoctree.cpp b/indra/llmath/llvolumeoctree.cpp index b5a935c2b5..cc83cb7235 100644 --- a/indra/llmath/llvolumeoctree.cpp +++ b/indra/llmath/llvolumeoctree.cpp @@ -131,7 +131,7 @@ void LLOctreeTriangleRayIntersect::traverse(const LLOctreeNode<LLVolumeTriangle> void LLOctreeTriangleRayIntersect::visit(const LLOctreeNode<LLVolumeTriangle>* node) { for (LLOctreeNode<LLVolumeTriangle>::const_element_iter iter = - node->getData().begin(); iter != node->getData().end(); ++iter) + node->getDataBegin(); iter != node->getDataEnd(); ++iter) { const LLVolumeTriangle* tri = *iter; @@ -236,8 +236,8 @@ void LLVolumeOctreeValidate::visit(const LLOctreeNode<LLVolumeTriangle>* branch) } //children fit, check data - for (LLOctreeNode<LLVolumeTriangle>::const_element_iter iter = branch->getData().begin(); - iter != branch->getData().end(); ++iter) + for (LLOctreeNode<LLVolumeTriangle>::const_element_iter iter = branch->getDataBegin(); + iter != branch->getDataEnd(); ++iter) { const LLVolumeTriangle* tri = *iter; diff --git a/indra/llmath/llvolumeoctree.h b/indra/llmath/llvolumeoctree.h index 688d91dc40..9ae34a0c4e 100644 --- a/indra/llmath/llvolumeoctree.h +++ b/indra/llmath/llvolumeoctree.h @@ -37,9 +37,19 @@ class LLVolumeTriangle : public LLRefCount { public: + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + LLVolumeTriangle() { - + mBinIndex = -1; } LLVolumeTriangle(const LLVolumeTriangle& rhs) @@ -58,21 +68,38 @@ public: } - LLVector4a mPositionGroup; + LL_ALIGN_16(LLVector4a mPositionGroup); const LLVector4a* mV[3]; U16 mIndex[3]; F32 mRadius; + mutable S32 mBinIndex; + virtual const LLVector4a& getPositionGroup() const; virtual const F32& getBinRadius() const; + + S32 getBinIndex() const { return mBinIndex; } + void setBinIndex(S32 idx) const { mBinIndex = idx; } + + }; class LLVolumeOctreeListener : public LLOctreeListener<LLVolumeTriangle> { public: + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + LLVolumeOctreeListener(LLOctreeNode<LLVolumeTriangle>* node); ~LLVolumeOctreeListener(); @@ -99,8 +126,8 @@ public: public: - LLVector4a mBounds[2]; // bounding box (center, size) of this node and all its children (tight fit to objects) - LLVector4a mExtents[2]; // extents (min, max) of this node and all its children + LL_ALIGN_16(LLVector4a mBounds[2]); // bounding box (center, size) of this node and all its children (tight fit to objects) + LL_ALIGN_16(LLVector4a mExtents[2]); // extents (min, max) of this node and all its children }; class LLOctreeTriangleRayIntersect : public LLOctreeTraveler<LLVolumeTriangle> diff --git a/indra/llmath/m4math.cpp b/indra/llmath/m4math.cpp index bad4deb4de..6a1b4143cf 100644 --- a/indra/llmath/m4math.cpp +++ b/indra/llmath/m4math.cpp @@ -858,25 +858,25 @@ LLSD LLMatrix4::getValue() const void LLMatrix4::setValue(const LLSD& data) { - mMatrix[0][0] = data[0].asReal(); - mMatrix[0][1] = data[1].asReal(); - mMatrix[0][2] = data[2].asReal(); - mMatrix[0][3] = data[3].asReal(); - - mMatrix[1][0] = data[4].asReal(); - mMatrix[1][1] = data[5].asReal(); - mMatrix[1][2] = data[6].asReal(); - mMatrix[1][3] = data[7].asReal(); - - mMatrix[2][0] = data[8].asReal(); - mMatrix[2][1] = data[9].asReal(); - mMatrix[2][2] = data[10].asReal(); - mMatrix[2][3] = data[11].asReal(); - - mMatrix[3][0] = data[12].asReal(); - mMatrix[3][1] = data[13].asReal(); - mMatrix[3][2] = data[14].asReal(); - mMatrix[3][3] = data[15].asReal(); + mMatrix[0][0] = (F32)data[0].asReal(); + mMatrix[0][1] = (F32)data[1].asReal(); + mMatrix[0][2] = (F32)data[2].asReal(); + mMatrix[0][3] = (F32)data[3].asReal(); + + mMatrix[1][0] = (F32)data[4].asReal(); + mMatrix[1][1] = (F32)data[5].asReal(); + mMatrix[1][2] = (F32)data[6].asReal(); + mMatrix[1][3] = (F32)data[7].asReal(); + + mMatrix[2][0] = (F32)data[8].asReal(); + mMatrix[2][1] = (F32)data[9].asReal(); + mMatrix[2][2] = (F32)data[10].asReal(); + mMatrix[2][3] = (F32)data[11].asReal(); + + mMatrix[3][0] = (F32)data[12].asReal(); + mMatrix[3][1] = (F32)data[13].asReal(); + mMatrix[3][2] = (F32)data[14].asReal(); + mMatrix[3][3] = (F32)data[15].asReal(); } diff --git a/indra/llmath/tests/alignment_test.cpp b/indra/llmath/tests/alignment_test.cpp new file mode 100644 index 0000000000..5ee3c45502 --- /dev/null +++ b/indra/llmath/tests/alignment_test.cpp @@ -0,0 +1,141 @@ +/** + * @file v3dmath_test.cpp + * @author Vir + * @date 2011-12 + * @brief v3dmath test cases. + * + * $LicenseInfo:firstyear=2011&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2011, Linden Research, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA + * $/LicenseInfo$ + */ + +// Tests related to allocating objects with alignment constraints, particularly for SSE support. + +#include "linden_common.h" +#include "../test/lltut.h" +#include "../llmath.h" +#include "../llsimdmath.h" +#include "../llvector4a.h" + +namespace tut +{ + +#define is_aligned(ptr,alignment) ((reinterpret_cast<uintptr_t>(ptr))%(alignment)==0) +#define is_aligned_relative(ptr,base_ptr,alignment) ((reinterpret_cast<uintptr_t>(ptr)-reinterpret_cast<uintptr_t>(base_ptr))%(alignment)==0) + +struct alignment_test {}; + +typedef test_group<alignment_test> alignment_test_t; +typedef alignment_test_t::object alignment_test_object_t; +tut::alignment_test_t tut_alignment_test("LLAlignment"); + +LL_ALIGN_PREFIX(16) +class MyVector4a +{ +public: + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void *p) + { + ll_aligned_free_16(p); + } + + void* operator new[](size_t count) + { // try to allocate count bytes for an array + return ll_aligned_malloc_16(count); + } + + void operator delete[](void *p) + { + ll_aligned_free_16(p); + } + + LLQuad mQ; +} LL_ALIGN_POSTFIX(16); + + +// Verify that aligned allocators perform as advertised. +template<> template<> +void alignment_test_object_t::test<1>() +{ +# ifdef LL_DEBUG +// skip("This test fails on Windows when compiled in debug mode."); +# endif + + const int num_tests = 7; + void *align_ptr; + for (int i=0; i<num_tests; i++) + { + align_ptr = ll_aligned_malloc_16(sizeof(MyVector4a)); + ensure("ll_aligned_malloc_16 failed", is_aligned(align_ptr,16)); + + align_ptr = ll_aligned_realloc_16(align_ptr,2*sizeof(MyVector4a), sizeof(MyVector4a)); + ensure("ll_aligned_realloc_16 failed", is_aligned(align_ptr,16)); + + ll_aligned_free_16(align_ptr); + + align_ptr = ll_aligned_malloc_32(sizeof(MyVector4a)); + ensure("ll_aligned_malloc_32 failed", is_aligned(align_ptr,32)); + ll_aligned_free_32(align_ptr); + } +} + +// In-place allocation of objects and arrays. +template<> template<> +void alignment_test_object_t::test<2>() +{ + MyVector4a vec1; + ensure("LLAlignment vec1 unaligned", is_aligned(&vec1,16)); + + MyVector4a veca[12]; + ensure("LLAlignment veca unaligned", is_aligned(veca,16)); +} + +// Heap allocation of objects and arrays. +template<> template<> +void alignment_test_object_t::test<3>() +{ +# ifdef LL_DEBUG +// skip("This test fails on Windows when compiled in debug mode."); +# endif + + const int ARR_SIZE = 7; + for(int i=0; i<ARR_SIZE; i++) + { + MyVector4a *vecp = new MyVector4a; + ensure("LLAlignment vecp unaligned", is_aligned(vecp,16)); + delete vecp; + } + + MyVector4a *veca = new MyVector4a[ARR_SIZE]; + //std::cout << "veca base is " << (S32) veca << std::endl; + ensure("LLAligment veca base", is_aligned(veca,16)); + for(int i=0; i<ARR_SIZE; i++) + { + std::cout << "veca[" << i << "]" << std::endl; + ensure("LLAlignment veca member unaligned", is_aligned(&veca[i],16)); + } + delete [] veca; +} + +} diff --git a/indra/llmath/v3color.h b/indra/llmath/v3color.h index 56cb2ae73e..daf3a6857b 100644 --- a/indra/llmath/v3color.h +++ b/indra/llmath/v3color.h @@ -33,6 +33,7 @@ class LLVector4; #include "llerror.h" #include "llmath.h" #include "llsd.h" +#include <string.h> // LLColor3 = |r g b| |