diff options
Diffstat (limited to 'indra/llmath')
93 files changed, 1413 insertions, 1923 deletions
diff --git a/indra/llmath/CMakeLists.txt b/indra/llmath/CMakeLists.txt index b5e59c1ca3..0614fd92ef 100644..100755 --- a/indra/llmath/CMakeLists.txt +++ b/indra/llmath/CMakeLists.txt @@ -7,6 +7,7 @@ include(LLCommon) include_directories( ${LLCOMMON_INCLUDE_DIRS} + ${LLCOMMON_SYSTEM_INCLUDE_DIRS} ) set(llmath_SOURCE_FILES @@ -99,6 +100,10 @@ list(APPEND llmath_SOURCE_FILES ${llmath_HEADER_FILES}) add_library (llmath ${llmath_SOURCE_FILES}) +target_link_libraries(llmath + ${LLCOMMON_LIBRARIES} + ) + # Add tests if (LL_TESTS) include(LLAddBuildTest) @@ -117,6 +122,7 @@ if (LL_TESTS) # INTEGRATION TESTS set(test_libs llmath llcommon ${LLCOMMON_LIBRARIES} ${WINDOWS_LIBRARIES}) # TODO: Some of these need refactoring to be proper Unit tests rather than Integration tests. + LL_ADD_INTEGRATION_TEST(alignment "" "${test_libs}") LL_ADD_INTEGRATION_TEST(llbbox llbbox.cpp "${test_libs}") LL_ADD_INTEGRATION_TEST(llquaternion llquaternion.cpp "${test_libs}") LL_ADD_INTEGRATION_TEST(mathmisc "" "${test_libs}") diff --git a/indra/llmath/camera.h b/indra/llmath/camera.h index 26f3c3d19f..26f3c3d19f 100644..100755 --- a/indra/llmath/camera.h +++ b/indra/llmath/camera.h diff --git a/indra/llmath/coordframe.h b/indra/llmath/coordframe.h index 271bcb433c..271bcb433c 100644..100755 --- a/indra/llmath/coordframe.h +++ b/indra/llmath/coordframe.h diff --git a/indra/llmath/llbbox.cpp b/indra/llmath/llbbox.cpp index 3e2c05a6e6..3e2c05a6e6 100644..100755 --- a/indra/llmath/llbbox.cpp +++ b/indra/llmath/llbbox.cpp diff --git a/indra/llmath/llbbox.h b/indra/llmath/llbbox.h index 28e69b75e1..28e69b75e1 100644..100755 --- a/indra/llmath/llbbox.h +++ b/indra/llmath/llbbox.h diff --git a/indra/llmath/llbboxlocal.cpp b/indra/llmath/llbboxlocal.cpp index bf0c1a7b93..bf0c1a7b93 100644..100755 --- a/indra/llmath/llbboxlocal.cpp +++ b/indra/llmath/llbboxlocal.cpp diff --git a/indra/llmath/llbboxlocal.h b/indra/llmath/llbboxlocal.h index defb899248..defb899248 100644..100755 --- a/indra/llmath/llbboxlocal.h +++ b/indra/llmath/llbboxlocal.h diff --git a/indra/llmath/llcalc.cpp b/indra/llmath/llcalc.cpp index 1b2d609b67..1b2d609b67 100644..100755 --- a/indra/llmath/llcalc.cpp +++ b/indra/llmath/llcalc.cpp diff --git a/indra/llmath/llcalc.h b/indra/llmath/llcalc.h index ceb9dce585..ceb9dce585 100644..100755 --- a/indra/llmath/llcalc.h +++ b/indra/llmath/llcalc.h diff --git a/indra/llmath/llcalcparser.cpp b/indra/llmath/llcalcparser.cpp index b4ca320659..b4ca320659 100644..100755 --- a/indra/llmath/llcalcparser.cpp +++ b/indra/llmath/llcalcparser.cpp diff --git a/indra/llmath/llcalcparser.h b/indra/llmath/llcalcparser.h index e0ad270266..e0ad270266 100644..100755 --- a/indra/llmath/llcalcparser.h +++ b/indra/llmath/llcalcparser.h diff --git a/indra/llmath/llcamera.cpp b/indra/llmath/llcamera.cpp index 22ba26f99b..33cf185196 100644..100755 --- a/indra/llmath/llcamera.cpp +++ b/indra/llmath/llcamera.cpp @@ -42,6 +42,11 @@ LLCamera::LLCamera() : mPlaneCount(6), mFrustumCornerDist(0.f) { + for (U32 i = 0; i < PLANE_MASK_NUM; i++) + { + mPlaneMask[i] = PLANE_MASK_NONE; + } + calculateFrustumPlanes(); } @@ -52,6 +57,11 @@ LLCamera::LLCamera(F32 vertical_fov_rads, F32 aspect_ratio, S32 view_height_in_p mPlaneCount(6), mFrustumCornerDist(0.f) { + for (U32 i = 0; i < PLANE_MASK_NUM; i++) + { + mPlaneMask[i] = PLANE_MASK_NONE; + } + mAspect = llclamp(aspect_ratio, MIN_ASPECT_RATIO, MAX_ASPECT_RATIO); mNearPlane = llclamp(near_plane, MIN_NEAR_PLANE, MAX_NEAR_PLANE); if(far_plane < 0) far_plane = DEFAULT_FAR_PLANE; @@ -87,14 +97,14 @@ F32 LLCamera::getMaxView() const void LLCamera::setUserClipPlane(LLPlane& plane) { - mPlaneCount = 7; - mAgentPlanes[6] = plane; - mPlaneMask[6] = plane.calcPlaneMask(); + mPlaneCount = AGENT_PLANE_USER_CLIP_NUM; + mAgentPlanes[AGENT_PLANE_USER_CLIP] = plane; + mPlaneMask[AGENT_PLANE_USER_CLIP] = plane.calcPlaneMask(); } void LLCamera::disableUserClipPlane() { - mPlaneCount = 6; + mPlaneCount = AGENT_PLANE_NO_USER_CLIP_NUM; } void LLCamera::setView(F32 vertical_fov_rads) @@ -161,31 +171,33 @@ size_t LLCamera::readFrustumFromBuffer(const char *buffer) // ---------------- test methods ---------------- -S32 LLCamera::AABBInFrustum(const LLVector4a ¢er, const LLVector4a& radius) +static const LLVector4a sFrustumScaler[] = { - static const LLVector4a scaler[] = { - LLVector4a(-1,-1,-1), - LLVector4a( 1,-1,-1), - LLVector4a(-1, 1,-1), - LLVector4a( 1, 1,-1), - LLVector4a(-1,-1, 1), - LLVector4a( 1,-1, 1), - LLVector4a(-1, 1, 1), - LLVector4a( 1, 1, 1) - }; + LLVector4a(-1,-1,-1), + LLVector4a( 1,-1,-1), + LLVector4a(-1, 1,-1), + LLVector4a( 1, 1,-1), + LLVector4a(-1,-1, 1), + LLVector4a( 1,-1, 1), + LLVector4a(-1, 1, 1), + LLVector4a( 1, 1, 1) // 8 entries +}; +S32 LLCamera::AABBInFrustum(const LLVector4a ¢er, const LLVector4a& radius) +{ U8 mask = 0; bool result = false; LLVector4a rscale, maxp, minp; LLSimdScalar d; - for (U32 i = 0; i < mPlaneCount; i++) + U32 max_planes = llmin(mPlaneCount, (U32) AGENT_PLANE_USER_CLIP_NUM); // mAgentPlanes[] size is 7 + for (U32 i = 0; i < max_planes; i++) { mask = mPlaneMask[i]; - if (mask != 0xff) + if (mask < PLANE_MASK_NUM) { const LLPlane& p(mAgentPlanes[i]); p.getAt<3>(d); - rscale.setMul(radius, scaler[mask]); + rscale.setMul(radius, sFrustumScaler[mask]); minp.setSub(center, rscale); d = -d; if (p.dot3(minp).getF32() > d) @@ -207,29 +219,19 @@ S32 LLCamera::AABBInFrustum(const LLVector4a ¢er, const LLVector4a& radius) S32 LLCamera::AABBInFrustumNoFarClip(const LLVector4a& center, const LLVector4a& radius) { - static const LLVector4a scaler[] = { - LLVector4a(-1,-1,-1), - LLVector4a( 1,-1,-1), - LLVector4a(-1, 1,-1), - LLVector4a( 1, 1,-1), - LLVector4a(-1,-1, 1), - LLVector4a( 1,-1, 1), - LLVector4a(-1, 1, 1), - LLVector4a( 1, 1, 1) - }; - U8 mask = 0; bool result = false; LLVector4a rscale, maxp, minp; LLSimdScalar d; - for (U32 i = 0; i < mPlaneCount; i++) + U32 max_planes = llmin(mPlaneCount, (U32) AGENT_PLANE_USER_CLIP_NUM); // mAgentPlanes[] size is 7 + for (U32 i = 0; i < max_planes; i++) { mask = mPlaneMask[i]; - if ((i != 5) && (mask != 0xff)) + if ((i != 5) && (mask < PLANE_MASK_NUM)) { const LLPlane& p(mAgentPlanes[i]); p.getAt<3>(d); - rscale.setMul(radius, scaler[mask]); + rscale.setMul(radius, sFrustumScaler[mask]); minp.setSub(center, rscale); d = -d; if (p.dot3(minp).getF32() > d) @@ -369,7 +371,7 @@ int LLCamera::sphereInFrustum(const LLVector3 &sphere_center, const F32 radius) bool res = false; for (int i = 0; i < 6; i++) { - if (mPlaneMask[i] != 0xff) + if (mPlaneMask[i] != PLANE_MASK_NONE) { float d = mAgentPlanes[i].dist(sphere_center); @@ -541,14 +543,14 @@ void LLCamera::ignoreAgentFrustumPlane(S32 idx) return; } - mPlaneMask[idx] = 0xff; + mPlaneMask[idx] = PLANE_MASK_NONE; mAgentPlanes[idx].clear(); } void LLCamera::calcAgentFrustumPlanes(LLVector3* frust) { - for (int i = 0; i < 8; i++) + for (int i = 0; i < AGENT_FRUSTRUM_NUM; i++) { mAgentFrustum[i] = frust[i]; } @@ -560,22 +562,22 @@ void LLCamera::calcAgentFrustumPlanes(LLVector3* frust) //order of planes is important, keep most likely to fail in the front of the list //near - frust[0], frust[1], frust[2] - mAgentPlanes[2] = planeFromPoints(frust[0], frust[1], frust[2]); + mAgentPlanes[AGENT_PLANE_NEAR] = planeFromPoints(frust[0], frust[1], frust[2]); //far - mAgentPlanes[5] = planeFromPoints(frust[5], frust[4], frust[6]); + mAgentPlanes[AGENT_PLANE_FAR] = planeFromPoints(frust[5], frust[4], frust[6]); //left - mAgentPlanes[0] = planeFromPoints(frust[4], frust[0], frust[7]); + mAgentPlanes[AGENT_PLANE_LEFT] = planeFromPoints(frust[4], frust[0], frust[7]); //right - mAgentPlanes[1] = planeFromPoints(frust[1], frust[5], frust[6]); + mAgentPlanes[AGENT_PLANE_RIGHT] = planeFromPoints(frust[1], frust[5], frust[6]); //top - mAgentPlanes[4] = planeFromPoints(frust[3], frust[2], frust[6]); + mAgentPlanes[AGENT_PLANE_TOP] = planeFromPoints(frust[3], frust[2], frust[6]); //bottom - mAgentPlanes[3] = planeFromPoints(frust[1], frust[0], frust[4]); + mAgentPlanes[AGENT_PLANE_BOTTOM] = planeFromPoints(frust[1], frust[0], frust[4]); //cache plane octant facing mask for use in AABBInFrustum for (U32 i = 0; i < mPlaneCount; i++) @@ -635,7 +637,7 @@ void LLCamera::calculateWorldFrustumPlanes() LLVector3 center = mOrigin - mXAxis*mNearPlane; mWorldPlanePos = center; LLVector3 pnorm; - for (int p=0; p<4; p++) + for (int p = 0; p < PLANE_NUM; p++) { mLocalPlanes[p].getVector3(pnorm); LLVector3 norm = rotateToAbsolute(pnorm); diff --git a/indra/llmath/llcamera.h b/indra/llmath/llcamera.h index ec67b91d05..1283cfb16b 100644..100755 --- a/indra/llmath/llcamera.h +++ b/indra/llmath/llcamera.h @@ -60,7 +60,7 @@ static const F32 MAX_FIELD_OF_VIEW = 175.f * DEG_TO_RAD; // roll(), pitch(), yaw() // etc... - +LL_ALIGN_PREFIX(16) class LLCamera : public LLCoordFrame { @@ -76,26 +76,39 @@ public: PLANE_RIGHT = 1, PLANE_BOTTOM = 2, PLANE_TOP = 3, - PLANE_NUM = 4 + PLANE_NUM = 4, + PLANE_MASK_NONE = 0xff // Disable this plane }; enum { PLANE_LEFT_MASK = (1<<PLANE_LEFT), PLANE_RIGHT_MASK = (1<<PLANE_RIGHT), PLANE_BOTTOM_MASK = (1<<PLANE_BOTTOM), PLANE_TOP_MASK = (1<<PLANE_TOP), - PLANE_ALL_MASK = 0xf + PLANE_ALL_MASK = 0xf, }; enum - { + { // Indexes to mAgentPlanes[] and mPlaneMask[] AGENT_PLANE_LEFT = 0, - AGENT_PLANE_RIGHT, - AGENT_PLANE_NEAR, - AGENT_PLANE_BOTTOM, - AGENT_PLANE_TOP, - AGENT_PLANE_FAR, + AGENT_PLANE_RIGHT = 1, + AGENT_PLANE_NEAR = 2, + AGENT_PLANE_BOTTOM = 3, + AGENT_PLANE_TOP = 4, + AGENT_PLANE_FAR = 5, + AGENT_PLANE_USER_CLIP = 6 + }; + enum + { // Sizes for mAgentPlanes[]. 7th entry is special case for user clip + AGENT_PLANE_NO_USER_CLIP_NUM = 6, + AGENT_PLANE_USER_CLIP_NUM = 7, + PLANE_MASK_NUM = 8 // 7 actually used, 8 is for alignment }; + enum + { + AGENT_FRUSTRUM_NUM = 8 + }; + enum { HORIZ_PLANE_LEFT = 0, HORIZ_PLANE_RIGHT = 1, @@ -108,27 +121,27 @@ public: }; private: - LLPlane mAgentPlanes[7]; //frustum planes in agent space a la gluUnproject (I'm a bastard, I know) - DaveP - U8 mPlaneMask[8]; // 8 for alignment + LL_ALIGN_16(LLPlane mAgentPlanes[AGENT_PLANE_USER_CLIP_NUM]); //frustum planes in agent space a la gluUnproject (I'm a bastard, I know) - DaveP + U8 mPlaneMask[PLANE_MASK_NUM]; // 8 for alignment F32 mView; // angle between top and bottom frustum planes in radians. F32 mAspect; // width/height S32 mViewHeightInPixels; // for ViewHeightInPixels() only F32 mNearPlane; F32 mFarPlane; - LLPlane mLocalPlanes[4]; + LL_ALIGN_16(LLPlane mLocalPlanes[PLANE_NUM]); F32 mFixedDistance; // Always return this distance, unless < 0 LLVector3 mFrustCenter; // center of frustum and radius squared for ultra-quick exclusion test F32 mFrustRadiusSquared; - LLPlane mWorldPlanes[PLANE_NUM]; - LLPlane mHorizPlanes[HORIZ_PLANE_NUM]; + LL_ALIGN_16(LLPlane mWorldPlanes[PLANE_NUM]); + LL_ALIGN_16(LLPlane mHorizPlanes[HORIZ_PLANE_NUM]); U32 mPlaneCount; //defaults to 6, if setUserClipPlane is called, uses user supplied clip plane in LLVector3 mWorldPlanePos; // Position of World Planes (may be offset from camera) public: - LLVector3 mAgentFrustum[8]; //8 corners of 6-plane frustum + LLVector3 mAgentFrustum[AGENT_FRUSTRUM_NUM]; //8 corners of 6-plane frustum F32 mFrustumCornerDist; //distance to corner of frustum against far clip plane LLPlane& getAgentPlane(U32 idx) { return mAgentPlanes[idx]; } @@ -208,7 +221,7 @@ protected: void calculateFrustumPlanes(F32 left, F32 right, F32 top, F32 bottom); void calculateFrustumPlanesFromWindow(F32 x1, F32 y1, F32 x2, F32 y2); void calculateWorldFrustumPlanes(); -}; +} LL_ALIGN_POSTFIX(16); #endif diff --git a/indra/llmath/llcoord.h b/indra/llmath/llcoord.h index 9b76268afd..9b76268afd 100644..100755 --- a/indra/llmath/llcoord.h +++ b/indra/llmath/llcoord.h diff --git a/indra/llmath/llcoordframe.cpp b/indra/llmath/llcoordframe.cpp index 7dd8e43185..7dd8e43185 100644..100755 --- a/indra/llmath/llcoordframe.cpp +++ b/indra/llmath/llcoordframe.cpp diff --git a/indra/llmath/llcoordframe.h b/indra/llmath/llcoordframe.h index 909adf260c..909adf260c 100644..100755 --- a/indra/llmath/llcoordframe.h +++ b/indra/llmath/llcoordframe.h diff --git a/indra/llmath/llinterp.h b/indra/llmath/llinterp.h index 5187646179..5187646179 100644..100755 --- a/indra/llmath/llinterp.h +++ b/indra/llmath/llinterp.h diff --git a/indra/llmath/llline.cpp b/indra/llmath/llline.cpp index ef10d1e7fa..ef10d1e7fa 100644..100755 --- a/indra/llmath/llline.cpp +++ b/indra/llmath/llline.cpp diff --git a/indra/llmath/llline.h b/indra/llmath/llline.h index e1cbc1323e..e1cbc1323e 100644..100755 --- a/indra/llmath/llline.h +++ b/indra/llmath/llline.h diff --git a/indra/llmath/llmath.h b/indra/llmath/llmath.h index 9297bcbac2..b93f89d674 100644..100755 --- a/indra/llmath/llmath.h +++ b/indra/llmath/llmath.h @@ -85,7 +85,7 @@ const F32 F_ALMOST_ONE = 1.0f - F_ALMOST_ZERO; const F32 FP_MAG_THRESHOLD = 0.0000001f; // TODO: Replace with logic like is_approx_equal -inline BOOL is_approx_zero( F32 f ) { return (-F_APPROXIMATELY_ZERO < f) && (f < F_APPROXIMATELY_ZERO); } +inline bool is_approx_zero( F32 f ) { return (-F_APPROXIMATELY_ZERO < f) && (f < F_APPROXIMATELY_ZERO); } // These functions work by interpreting sign+exp+mantissa as an unsigned // integer. @@ -111,13 +111,13 @@ inline BOOL is_approx_zero( F32 f ) { return (-F_APPROXIMATELY_ZERO < f) && (f < // WARNING: Infinity is comparable with F32_MAX and negative // infinity is comparable with F32_MIN -inline BOOL is_approx_equal(F32 x, F32 y) +inline bool is_approx_equal(F32 x, F32 y) { const S32 COMPARE_MANTISSA_UP_TO_BIT = 0x02; return (std::abs((S32) ((U32&)x - (U32&)y) ) < COMPARE_MANTISSA_UP_TO_BIT); } -inline BOOL is_approx_equal(F64 x, F64 y) +inline bool is_approx_equal(F64 x, F64 y) { const S64 COMPARE_MANTISSA_UP_TO_BIT = 0x02; return (std::abs((S32) ((U64&)x - (U64&)y) ) < COMPARE_MANTISSA_UP_TO_BIT); diff --git a/indra/llmath/llmatrix3a.cpp b/indra/llmath/llmatrix3a.cpp index ab077abcb0..ab077abcb0 100644..100755 --- a/indra/llmath/llmatrix3a.cpp +++ b/indra/llmath/llmatrix3a.cpp diff --git a/indra/llmath/llmatrix3a.h b/indra/llmath/llmatrix3a.h index adb7e3389d..9916cfd2da 100644..100755 --- a/indra/llmath/llmatrix3a.h +++ b/indra/llmath/llmatrix3a.h @@ -111,7 +111,7 @@ public: protected: - LLVector4a mColumns[3]; + LL_ALIGN_16(LLVector4a mColumns[3]); }; diff --git a/indra/llmath/llmatrix3a.inl b/indra/llmath/llmatrix3a.inl index 37819fea3c..37819fea3c 100644..100755 --- a/indra/llmath/llmatrix3a.inl +++ b/indra/llmath/llmatrix3a.inl diff --git a/indra/llmath/llmatrix4a.h b/indra/llmath/llmatrix4a.h index 27cf5b79f6..d141298f69 100644..100755 --- a/indra/llmath/llmatrix4a.h +++ b/indra/llmath/llmatrix4a.h @@ -34,7 +34,7 @@ class LLMatrix4a { public: - LLVector4a mMatrix[4]; + LL_ALIGN_16(LLVector4a mMatrix[4]); inline void clear() { @@ -107,15 +107,14 @@ public: inline void rotate(const LLVector4a& v, LLVector4a& res) { + LLVector4a y,z; + res = _mm_shuffle_ps(v, v, _MM_SHUFFLE(0, 0, 0, 0)); - res.mul(mMatrix[0]); - - LLVector4a y; y = _mm_shuffle_ps(v, v, _MM_SHUFFLE(1, 1, 1, 1)); - y.mul(mMatrix[1]); - - LLVector4a z; z = _mm_shuffle_ps(v, v, _MM_SHUFFLE(2, 2, 2, 2)); + + res.mul(mMatrix[0]); + y.mul(mMatrix[1]); z.mul(mMatrix[2]); res.add(y); diff --git a/indra/llmath/llmodularmath.cpp b/indra/llmath/llmodularmath.cpp index cdc20028bf..cdc20028bf 100644..100755 --- a/indra/llmath/llmodularmath.cpp +++ b/indra/llmath/llmodularmath.cpp diff --git a/indra/llmath/llmodularmath.h b/indra/llmath/llmodularmath.h index 0d4d28fadc..0d4d28fadc 100644..100755 --- a/indra/llmath/llmodularmath.h +++ b/indra/llmath/llmodularmath.h diff --git a/indra/llmath/lloctree.h b/indra/llmath/lloctree.h index 1b11e83b4a..7348904c61 100644..100755 --- a/indra/llmath/lloctree.h +++ b/indra/llmath/lloctree.h @@ -31,7 +31,6 @@ #include "v3math.h" #include "llvector4a.h" #include <vector> -#include <set> #define OCT_ERRS LL_WARNS("OctreeErrors") @@ -79,16 +78,18 @@ public: typedef LLOctreeTraveler<T> oct_traveler; typedef LLTreeTraveler<T> tree_traveler; - typedef typename std::set<LLPointer<T> > element_list; - typedef typename element_list::iterator element_iter; - typedef typename element_list::const_iterator const_element_iter; + typedef std::vector< LLPointer<T> > element_list; // note: don't remove the whitespace between "> >" + typedef LLPointer<T>* element_iter; + typedef const LLPointer<T>* const_element_iter; typedef typename std::vector<LLTreeListener<T>*>::iterator tree_listener_iter; - typedef typename std::vector<LLOctreeNode<T>* > child_list; + typedef LLOctreeNode<T>** child_list; + typedef LLOctreeNode<T>** child_iter; + typedef LLTreeNode<T> BaseType; typedef LLOctreeNode<T> oct_node; typedef LLOctreeListener<T> oct_listener; - /*void* operator new(size_t size) + void* operator new(size_t size) { return ll_aligned_malloc_16(size); } @@ -96,7 +97,7 @@ public: void operator delete(void* ptr) { ll_aligned_free_16(ptr); - }*/ + } LLOctreeNode( const LLVector4a& center, const LLVector4a& size, @@ -105,6 +106,10 @@ public: : mParent((oct_node*)parent), mOctant(octant) { + //always keep a NULL terminated list to avoid out of bounds exceptions in debug builds + mData.push_back(NULL); + mDataEnd = &mData[0]; + mCenter = center; mSize = size; @@ -123,6 +128,16 @@ public: { BaseType::destroyListeners(); + for (U32 i = 0; i < mElementCount; ++i) + { + mData[i]->setBinIndex(-1); + mData[i] = NULL; + } + + mData.clear(); + mData.push_back(NULL); + mDataEnd = &mData[0]; + for (U32 i = 0; i < getChildCount(); i++) { delete getChild(i); @@ -219,12 +234,17 @@ public: } void accept(oct_traveler* visitor) { visitor->visit(this); } - virtual bool isLeaf() const { return mChild.empty(); } + virtual bool isLeaf() const { return mChildCount == 0; } U32 getElementCount() const { return mElementCount; } + bool isEmpty() const { return mElementCount == 0; } element_list& getData() { return mData; } const element_list& getData() const { return mData; } - + element_iter getDataBegin() { return &mData[0]; } + element_iter getDataEnd() { return mDataEnd; } + const_element_iter getDataBegin() const { return &mData[0]; } + const_element_iter getDataEnd() const { return mDataEnd; } + U32 getChildCount() const { return mChildCount; } oct_node* getChild(U32 index) { return mChild[index]; } const oct_node* getChild(U32 index) const { return mChild[index]; } @@ -289,7 +309,7 @@ public: virtual bool insert(T* data) { - if (data == NULL) + if (data == NULL || data->getBinIndex() != -1) { OCT_ERRS << "!!! INVALID ELEMENT ADDED TO OCTREE BRANCH !!!" << llendl; return false; @@ -302,13 +322,12 @@ public: if ((getElementCount() < gOctreeMaxCapacity && contains(data->getBinRadius()) || (data->getBinRadius() > getSize()[0] && parent && parent->getElementCount() >= gOctreeMaxCapacity))) { //it belongs here - //if this is a redundant insertion, error out (should never happen) - llassert(mData.find(data) == mData.end()); - - mData.insert(data); + mData.push_back(NULL); + mData[mElementCount] = data; + mElementCount++; + mDataEnd = &mData[mElementCount]; + data->setBinIndex(mElementCount-1); BaseType::insert(data); - - mElementCount = mData.size(); return true; } else @@ -342,10 +361,12 @@ public: if( lt == 0x7 ) { - mData.insert(data); + mData.push_back(NULL); + mData[mElementCount] = data; + mElementCount++; + mDataEnd = &mData[mElementCount]; + data->setBinIndex(mElementCount-1); BaseType::insert(data); - - mElementCount = mData.size(); return true; } @@ -394,23 +415,58 @@ public: return false; } + void _remove(T* data, S32 i) + { //precondition -- mElementCount > 0, idx is in range [0, mElementCount) + + mElementCount--; + data->setBinIndex(-1); + + if (mElementCount > 0) + { + if (mElementCount != i) + { + mData[i] = mData[mElementCount]; //might unref data, do not access data after this point + mData[i]->setBinIndex(i); + } + + mData[mElementCount] = NULL; + mData.pop_back(); + mDataEnd = &mData[mElementCount]; + } + else + { + mData.clear(); + mData.push_back(NULL); + mDataEnd = &mData[0]; + } + + notifyRemoval(data); + checkAlive(); + } + bool remove(T* data) { - if (mData.find(data) != mData.end()) - { //we have data - mData.erase(data); - mElementCount = mData.size(); - notifyRemoval(data); - checkAlive(); - return true; - } - else if (isInside(data)) + S32 i = data->getBinIndex(); + + if (i >= 0 && i < mElementCount) + { + if (mData[i] == data) + { //found it + _remove(data, i); + llassert(data->getBinIndex() == -1); + return true; + } + } + + if (isInside(data)) { oct_node* dest = getNodeAt(data); if (dest != this) { - return dest->remove(data); + bool ret = dest->remove(data); + llassert(data->getBinIndex() == -1); + return ret; } } @@ -427,21 +483,22 @@ public: } //node is now root - llwarns << "!!! OCTREE REMOVING FACE BY ADDRESS, SEVERE PERFORMANCE PENALTY |||" << llendl; + llwarns << "!!! OCTREE REMOVING ELEMENT BY ADDRESS, SEVERE PERFORMANCE PENALTY |||" << llendl; node->removeByAddress(data); + llassert(data->getBinIndex() == -1); return true; } void removeByAddress(T* data) { - if (mData.find(data) != mData.end()) + for (U32 i = 0; i < mElementCount; ++i) { - mData.erase(data); - mElementCount = mData.size(); - notifyRemoval(data); - llwarns << "FOUND!" << llendl; - checkAlive(); - return; + if (mData[i] == data) + { //we have data + _remove(data, i); + llwarns << "FOUND!" << llendl; + return; + } } for (U32 i = 0; i < getChildCount(); i++) @@ -453,8 +510,8 @@ public: void clearChildren() { - mChild.clear(); mChildCount = 0; + U32* foo = (U32*) mChildMap; foo[0] = foo[1] = 0xFFFFFFFF; } @@ -516,7 +573,7 @@ public: mChildMap[child->getOctant()] = mChildCount; - mChild.push_back(child); + mChild[mChildCount] = child; ++mChildCount; child->setParent(this); @@ -543,9 +600,12 @@ public: mChild[index]->destroy(); delete mChild[index]; } - mChild.erase(mChild.begin() + index); + --mChildCount; + mChild[index] = mChild[mChildCount]; + + //rebuild child map U32* foo = (U32*) mChildMap; foo[0] = foo[1] = 0xFFFFFFFF; @@ -601,11 +661,12 @@ protected: oct_node* mParent; U8 mOctant; - child_list mChild; + LLOctreeNode<T>* mChild[8]; U8 mChildMap[8]; U32 mChildCount; element_list mData; + element_iter mDataEnd; U32 mElementCount; }; diff --git a/indra/llmath/llperlin.cpp b/indra/llmath/llperlin.cpp index e1da2bf92b..e1da2bf92b 100644..100755 --- a/indra/llmath/llperlin.cpp +++ b/indra/llmath/llperlin.cpp diff --git a/indra/llmath/llperlin.h b/indra/llmath/llperlin.h index 40cf19d1ec..40cf19d1ec 100644..100755 --- a/indra/llmath/llperlin.h +++ b/indra/llmath/llperlin.h diff --git a/indra/llmath/llplane.h b/indra/llmath/llplane.h index a611894721..3c32441b11 100644..100755 --- a/indra/llmath/llplane.h +++ b/indra/llmath/llplane.h @@ -36,6 +36,8 @@ // The plane normal = [A, B, C] // The closest approach = D / sqrt(A*A + B*B + C*C) + +LL_ALIGN_PREFIX(16) class LLPlane { public: @@ -94,7 +96,7 @@ public: private: LLVector4a mV; -}; +} LL_ALIGN_POSTFIX(16); diff --git a/indra/llmath/llquantize.h b/indra/llmath/llquantize.h index 1595dbecf8..1595dbecf8 100644..100755 --- a/indra/llmath/llquantize.h +++ b/indra/llmath/llquantize.h diff --git a/indra/llmath/llquaternion.cpp b/indra/llmath/llquaternion.cpp index 7381d5eb99..7381d5eb99 100644..100755 --- a/indra/llmath/llquaternion.cpp +++ b/indra/llmath/llquaternion.cpp diff --git a/indra/llmath/llquaternion.h b/indra/llmath/llquaternion.h index ca0dfe206b..ca0dfe206b 100644..100755 --- a/indra/llmath/llquaternion.h +++ b/indra/llmath/llquaternion.h diff --git a/indra/llmath/llquaternion2.h b/indra/llmath/llquaternion2.h index fd9c0cf3ab..fd9c0cf3ab 100644..100755 --- a/indra/llmath/llquaternion2.h +++ b/indra/llmath/llquaternion2.h diff --git a/indra/llmath/llquaternion2.inl b/indra/llmath/llquaternion2.inl index 2a6987552d..2a6987552d 100644..100755 --- a/indra/llmath/llquaternion2.inl +++ b/indra/llmath/llquaternion2.inl diff --git a/indra/llmath/llrect.cpp b/indra/llmath/llrect.cpp index 4083c99768..4083c99768 100644..100755 --- a/indra/llmath/llrect.cpp +++ b/indra/llmath/llrect.cpp diff --git a/indra/llmath/llrect.h b/indra/llmath/llrect.h index c51e0e0ae6..c51e0e0ae6 100644..100755 --- a/indra/llmath/llrect.h +++ b/indra/llmath/llrect.h diff --git a/indra/llmath/llsdutil_math.cpp b/indra/llmath/llsdutil_math.cpp index 591f7fde36..591f7fde36 100644..100755 --- a/indra/llmath/llsdutil_math.cpp +++ b/indra/llmath/llsdutil_math.cpp diff --git a/indra/llmath/llsdutil_math.h b/indra/llmath/llsdutil_math.h index 0ea78cd231..0ea78cd231 100644..100755 --- a/indra/llmath/llsdutil_math.h +++ b/indra/llmath/llsdutil_math.h diff --git a/indra/llmath/llsimdmath.h b/indra/llmath/llsimdmath.h index c7cdf7b32c..cebd2ace7d 100644..100755 --- a/indra/llmath/llsimdmath.h +++ b/indra/llmath/llsimdmath.h @@ -39,39 +39,10 @@ #include <stdint.h> #endif -template <typename T> T* LL_NEXT_ALIGNED_ADDRESS(T* address) -{ - return reinterpret_cast<T*>( - (reinterpret_cast<uintptr_t>(address) + 0xF) & ~0xF); -} - -template <typename T> T* LL_NEXT_ALIGNED_ADDRESS_64(T* address) -{ - return reinterpret_cast<T*>( - (reinterpret_cast<uintptr_t>(address) + 0x3F) & ~0x3F); -} - -#if LL_LINUX || LL_DARWIN - -#define LL_ALIGN_PREFIX(x) -#define LL_ALIGN_POSTFIX(x) __attribute__((aligned(x))) - -#elif LL_WINDOWS - -#define LL_ALIGN_PREFIX(x) __declspec(align(x)) -#define LL_ALIGN_POSTFIX(x) - -#else -#error "LL_ALIGN_PREFIX and LL_ALIGN_POSTFIX undefined" -#endif - -#define LL_ALIGN_16(var) LL_ALIGN_PREFIX(16) var LL_ALIGN_POSTFIX(16) - - - #include <xmmintrin.h> #include <emmintrin.h> +#include "llmemory.h" #include "llsimdtypes.h" #include "llsimdtypes.inl" diff --git a/indra/llmath/llsimdtypes.h b/indra/llmath/llsimdtypes.h index bd991d0e71..bd991d0e71 100644..100755 --- a/indra/llmath/llsimdtypes.h +++ b/indra/llmath/llsimdtypes.h diff --git a/indra/llmath/llsimdtypes.inl b/indra/llmath/llsimdtypes.inl index 712239e425..e905c84954 100644..100755 --- a/indra/llmath/llsimdtypes.inl +++ b/indra/llmath/llsimdtypes.inl @@ -62,6 +62,7 @@ inline LLSimdScalar operator/(const LLSimdScalar& a, const LLSimdScalar& b) inline LLSimdScalar operator-(const LLSimdScalar& a) { static LL_ALIGN_16(const U32 signMask[4]) = {0x80000000, 0x80000000, 0x80000000, 0x80000000 }; + ll_assert_aligned(signMask,16); return _mm_xor_ps(*reinterpret_cast<const LLQuad*>(signMask), a); } @@ -146,6 +147,7 @@ inline LLSimdScalar& LLSimdScalar::operator/=(const LLSimdScalar& rhs) inline LLSimdScalar LLSimdScalar::getAbs() const { static const LL_ALIGN_16(U32 F_ABS_MASK_4A[4]) = { 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF }; + ll_assert_aligned(F_ABS_MASK_4A,16); return _mm_and_ps( mQ, *reinterpret_cast<const LLQuad*>(F_ABS_MASK_4A)); } diff --git a/indra/llmath/llsphere.cpp b/indra/llmath/llsphere.cpp index 740047b93a..740047b93a 100644..100755 --- a/indra/llmath/llsphere.cpp +++ b/indra/llmath/llsphere.cpp diff --git a/indra/llmath/llsphere.h b/indra/llmath/llsphere.h index 7c60a11406..7c60a11406 100644..100755 --- a/indra/llmath/llsphere.h +++ b/indra/llmath/llsphere.h diff --git a/indra/llmath/lltreenode.h b/indra/llmath/lltreenode.h index c66bc26176..c66bc26176 100644..100755 --- a/indra/llmath/lltreenode.h +++ b/indra/llmath/lltreenode.h diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp index b66b7a7076..570fa41a43 100644..100755 --- a/indra/llmath/llvector4a.cpp +++ b/indra/llmath/llvector4a.cpp @@ -24,6 +24,7 @@ * $/LicenseInfo$ */ +#include "llmemory.h" #include "llmath.h" #include "llquantize.h" @@ -40,52 +41,7 @@ extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast<const LLVector4a&> ( F /*static */void LLVector4a::memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes) { - assert(src != NULL); - assert(dst != NULL); - assert(bytes > 0); - assert((bytes % sizeof(F32))== 0); - - F32* end = dst + (bytes / sizeof(F32) ); - - if (bytes > 64) - { - F32* begin_64 = LL_NEXT_ALIGNED_ADDRESS_64(dst); - - //at least 64 (16*4) bytes before the end of the destination, switch to 16 byte copies - F32* end_64 = end-16; - - _mm_prefetch((char*)begin_64, _MM_HINT_NTA); - _mm_prefetch((char*)begin_64 + 64, _MM_HINT_NTA); - _mm_prefetch((char*)begin_64 + 128, _MM_HINT_NTA); - _mm_prefetch((char*)begin_64 + 192, _MM_HINT_NTA); - - while (dst < begin_64) - { - copy4a(dst, src); - dst += 4; - src += 4; - } - - while (dst < end_64) - { - _mm_prefetch((char*)src + 512, _MM_HINT_NTA); - _mm_prefetch((char*)dst + 512, _MM_HINT_NTA); - copy4a(dst, src); - copy4a(dst+4, src+4); - copy4a(dst+8, src+8); - copy4a(dst+12, src+12); - - dst += 16; - src += 16; - } - } - - while (dst < end) - { - copy4a(dst, src); - dst += 4; - src += 4; - } + ll_memcpy_nonaliased_aligned_16((char*)dst, (char*)src, bytes); } void LLVector4a::setRotated( const LLRotation& rot, const LLVector4a& vec ) @@ -189,6 +145,8 @@ void LLVector4a::quantize16( const LLVector4a& low, const LLVector4a& high ) LLVector4a oneOverDelta; { static LL_ALIGN_16( const F32 F_TWO_4A[4] ) = { 2.f, 2.f, 2.f, 2.f }; + ll_assert_aligned(F_TWO_4A,16); + LLVector4a two; two.load4a( F_TWO_4A ); // Here we use _mm_rcp_ps plus one round of newton-raphson diff --git a/indra/llmath/llvector4a.h b/indra/llmath/llvector4a.h index 596082509d..79d0a44551 100644..100755 --- a/indra/llmath/llvector4a.h +++ b/indra/llmath/llvector4a.h @@ -32,6 +32,7 @@ class LLRotation; #include <assert.h> #include "llpreprocessor.h" +#include "llmemory.h" /////////////////////////////////// // FIRST TIME USERS PLEASE READ @@ -45,7 +46,9 @@ class LLRotation; // of this writing, July 08, 2010) about getting it implemented before you resort to // LLVector3/LLVector4. ///////////////////////////////// +class LLVector4a; +LL_ALIGN_PREFIX(16) class LLVector4a { public: @@ -82,6 +85,7 @@ public: } // Copy words 16-byte blocks from src to dst. Source and destination must not overlap. + // Source and dest must be 16-byte aligned and size must be multiple of 16. static void memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes); //////////////////////////////////// @@ -90,6 +94,7 @@ public: LLVector4a() { //DO NOT INITIALIZE -- The overhead is completely unnecessary + ll_assert_aligned(this,16); } LLVector4a(F32 x, F32 y, F32 z, F32 w = 0.f) @@ -232,6 +237,11 @@ public: // Note that this does not consider zero length vectors! inline void normalize3fast(); + // Normalize this vector with respect to the x, y, and z components only. Accurate only to 10-12 bits of precision. W component is destroyed + // Same as above except substitutes default vector contents if the vector is non-finite or degenerate due to zero length. + // + inline void normalize3fast_checked(LLVector4a* d = 0); + // Return true if this vector is normalized with respect to x,y,z up to tolerance inline LLBool32 isNormalized3( F32 tolerance = 1e-3 ) const; @@ -313,7 +323,7 @@ public: private: LLQuad mQ; -}; +} LL_ALIGN_POSTFIX(16); inline void update_min_max(LLVector4a& min, LLVector4a& max, const LLVector4a& p) { diff --git a/indra/llmath/llvector4a.inl b/indra/llmath/llvector4a.inl index 7ad22a5631..69d3d01efe 100644..100755 --- a/indra/llmath/llvector4a.inl +++ b/indra/llmath/llvector4a.inl @@ -409,6 +409,26 @@ inline void LLVector4a::normalize3fast() mQ = _mm_mul_ps( mQ, approxRsqrt ); } +inline void LLVector4a::normalize3fast_checked(LLVector4a* d) +{ + if (!isFinite3()) + { + *this = d ? *d : LLVector4a(0,1,0,1); + return; + } + + LLVector4a lenSqrd; lenSqrd.setAllDot3( *this, *this ); + + if (lenSqrd.getF32ptr()[0] <= FLT_EPSILON) + { + *this = d ? *d : LLVector4a(0,1,0,1); + return; + } + + const LLQuad approxRsqrt = _mm_rsqrt_ps(lenSqrd.mQ); + mQ = _mm_mul_ps( mQ, approxRsqrt ); +} + // Return true if this vector is normalized with respect to x,y,z up to tolerance inline LLBool32 LLVector4a::isNormalized3( F32 tolerance ) const { @@ -460,21 +480,19 @@ inline void LLVector4a::setMax(const LLVector4a& lhs, const LLVector4a& rhs) mQ = _mm_max_ps(lhs.mQ, rhs.mQ); } -// Set this to (c * lhs) + rhs * ( 1 - c) +// Set this to lhs + (rhs-lhs)*c inline void LLVector4a::setLerp(const LLVector4a& lhs, const LLVector4a& rhs, F32 c) { - LLVector4a a = lhs; - a.mul(c); - - LLVector4a b = rhs; - b.mul(1.f-c); - - setAdd(a, b); + LLVector4a t; + t.setSub(rhs,lhs); + t.mul(c); + setAdd(lhs, t); } inline LLBool32 LLVector4a::isFinite3() const { static LL_ALIGN_16(const U32 nanOrInfMask[4]) = { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 }; + ll_assert_aligned(nanOrInfMask,16); const __m128i nanOrInfMaskV = *reinterpret_cast<const __m128i*> (nanOrInfMask); const __m128i maskResult = _mm_and_si128( _mm_castps_si128(mQ), nanOrInfMaskV ); const LLVector4Logical equalityCheck = _mm_castsi128_ps(_mm_cmpeq_epi32( maskResult, nanOrInfMaskV )); diff --git a/indra/llmath/llvector4logical.h b/indra/llmath/llvector4logical.h index dd66b09d43..c5698f7cea 100644..100755 --- a/indra/llmath/llvector4logical.h +++ b/indra/llmath/llvector4logical.h @@ -27,6 +27,7 @@ #ifndef LL_VECTOR4LOGICAL_H #define LL_VECTOR4LOGICAL_H +#include "llmemory.h" //////////////////////////// // LLVector4Logical @@ -77,6 +78,7 @@ public: inline LLVector4Logical& invert() { static const LL_ALIGN_16(U32 allOnes[4]) = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF }; + ll_assert_aligned(allOnes,16); mQ = _mm_andnot_ps( mQ, *(LLQuad*)(allOnes) ); return *this; } diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index cc9744756f..f74c934b21 100644..100755 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -35,7 +35,6 @@ #include <cmath> #include "llerror.h" -#include "llmemtype.h" #include "llvolumemgr.h" #include "v2math.h" @@ -95,17 +94,6 @@ const S32 SCULPT_MIN_AREA_DETAIL = 1; extern BOOL gDebugGL; -void assert_aligned(void* ptr, uintptr_t alignment) -{ -#if 0 - uintptr_t t = (uintptr_t) ptr; - if (t%alignment != 0) - { - llerrs << "Alignment check failed." << llendl; - } -#endif -} - BOOL check_same_clock_dir( const LLVector3& pt1, const LLVector3& pt2, const LLVector3& pt3, const LLVector3& norm) { LLVector3 test = (pt2-pt1)%(pt3-pt2); @@ -148,6 +136,83 @@ BOOL LLLineSegmentBoxIntersect(const F32* start, const F32* end, const F32* cent return true; } +// Finds tangent vec based on three vertices with texture coordinates. +// Fills in dummy values if the triangle has degenerate texture coordinates. +void calc_tangent_from_triangle( + LLVector4a& normal, + LLVector4a& tangent_out, + const LLVector4a& v1, + const LLVector2& w1, + const LLVector4a& v2, + const LLVector2& w2, + const LLVector4a& v3, + const LLVector2& w3) +{ + const F32* v1ptr = v1.getF32ptr(); + const F32* v2ptr = v2.getF32ptr(); + const F32* v3ptr = v3.getF32ptr(); + + float x1 = v2ptr[0] - v1ptr[0]; + float x2 = v3ptr[0] - v1ptr[0]; + float y1 = v2ptr[1] - v1ptr[1]; + float y2 = v3ptr[1] - v1ptr[1]; + float z1 = v2ptr[2] - v1ptr[2]; + float z2 = v3ptr[2] - v1ptr[2]; + + float s1 = w2.mV[0] - w1.mV[0]; + float s2 = w3.mV[0] - w1.mV[0]; + float t1 = w2.mV[1] - w1.mV[1]; + float t2 = w3.mV[1] - w1.mV[1]; + + F32 rd = s1*t2-s2*t1; + + float r = ((rd*rd) > FLT_EPSILON) ? (1.0f / rd) + : ((rd > 0.0f) ? 1024.f : -1024.f); //some made up large ratio for division by zero + + llassert(llfinite(r)); + llassert(!llisnan(r)); + + LLVector4a sdir( + (t2 * x1 - t1 * x2) * r, + (t2 * y1 - t1 * y2) * r, + (t2 * z1 - t1 * z2) * r); + + LLVector4a tdir( + (s1 * x2 - s2 * x1) * r, + (s1 * y2 - s2 * y1) * r, + (s1 * z2 - s2 * z1) * r); + + LLVector4a n = normal; + LLVector4a t = sdir; + + LLVector4a ncrosst; + ncrosst.setCross3(n,t); + + // Gram-Schmidt orthogonalize + n.mul(n.dot3(t).getF32()); + + LLVector4a tsubn; + tsubn.setSub(t,n); + + if (tsubn.dot3(tsubn).getF32() > F_APPROXIMATELY_ZERO) + { + tsubn.normalize3fast_checked(); + + // Calculate handedness + F32 handedness = ncrosst.dot3(tdir).getF32() < 0.f ? -1.f : 1.f; + + tsubn.getF32ptr()[3] = handedness; + + tangent_out = tsubn; + } + else + { + // degenerate, make up a value + // + tangent_out.set(0,0,1,1); + } + +} // intersect test between triangle vert0, vert1, vert2 and a ray from orig in direction dir. @@ -328,16 +393,16 @@ public: LLVector4a& min = node->mExtents[0]; LLVector4a& max = node->mExtents[1]; - if (!branch->getData().empty()) + if (!branch->isEmpty()) { //node has data, find AABB that binds data set - const LLVolumeTriangle* tri = *(branch->getData().begin()); + const LLVolumeTriangle* tri = *(branch->getDataBegin()); //initialize min/max to first available vertex min = *(tri->mV[0]); max = *(tri->mV[0]); for (LLOctreeNode<LLVolumeTriangle>::const_element_iter iter = - branch->getData().begin(); iter != branch->getData().end(); ++iter) + branch->getDataBegin(); iter != branch->getDataEnd(); ++iter) { //for each triangle in node //stretch by triangles in node @@ -352,7 +417,7 @@ public: max.setMax(max, *tri->mV[2]); } } - else if (!branch->getChildren().empty()) + else if (!branch->isLeaf()) { //no data, but child nodes exist LLVolumeOctreeListener* child = (LLVolumeOctreeListener*) branch->getChild(0)->getListener(0); @@ -389,8 +454,6 @@ public: LLProfile::Face* LLProfile::addCap(S16 faceID) { - LLMemType m1(LLMemType::MTYPE_VOLUME); - Face *face = vector_append(mFaces, 1); face->mIndex = 0; @@ -403,8 +466,6 @@ LLProfile::Face* LLProfile::addCap(S16 faceID) LLProfile::Face* LLProfile::addFace(S32 i, S32 count, F32 scaleU, S16 faceID, BOOL flat) { - LLMemType m1(LLMemType::MTYPE_VOLUME); - Face *face = vector_append(mFaces, 1); face->mIndex = i; @@ -420,7 +481,6 @@ LLProfile::Face* LLProfile::addFace(S32 i, S32 count, F32 scaleU, S16 faceID, BO //static S32 LLProfile::getNumNGonPoints(const LLProfileParams& params, S32 sides, F32 offset, F32 bevel, F32 ang_scale, S32 split) { // this is basically LLProfile::genNGon stripped down to only the operations that influence the number of points - LLMemType m1(LLMemType::MTYPE_VOLUME); S32 np = 0; // Generate an n-sided "circular" path. @@ -486,14 +546,12 @@ S32 LLProfile::getNumNGonPoints(const LLProfileParams& params, S32 sides, F32 of // filleted and chamfered corners void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F32 bevel, F32 ang_scale, S32 split) { - LLMemType m1(LLMemType::MTYPE_VOLUME); - // Generate an n-sided "circular" path. // 0 is (1,0), and we go counter-clockwise along a circular path from there. const F32 tableScale[] = { 1, 1, 1, 0.5f, 0.707107f, 0.53f, 0.525f, 0.5f }; F32 scale = 0.5f; F32 t, t_step, t_first, t_fraction, ang, ang_step; - LLVector3 pt1,pt2; + LLVector4a pt1,pt2; F32 begin = params.getBegin(); F32 end = params.getEnd(); @@ -516,20 +574,21 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3 // Starting t and ang values for the first face t = t_first; ang = 2.0f*F_PI*(t*ang_scale + offset); - pt1.setVec(cos(ang)*scale,sin(ang)*scale, t); + pt1.set(cos(ang)*scale,sin(ang)*scale, t); // Increment to the next point. // pt2 is the end point on the fractional face t += t_step; ang += ang_step; - pt2.setVec(cos(ang)*scale,sin(ang)*scale,t); + pt2.set(cos(ang)*scale,sin(ang)*scale,t); t_fraction = (begin - t_first)*sides; // Only use if it's not almost exactly on an edge. if (t_fraction < 0.9999f) { - LLVector3 new_pt = lerp(pt1, pt2, t_fraction); + LLVector4a new_pt; + new_pt.setLerp(pt1, pt2, t_fraction); mProfile.push_back(new_pt); } @@ -537,12 +596,17 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3 while (t < end) { // Iterate through all the integer steps of t. - pt1.setVec(cos(ang)*scale,sin(ang)*scale,t); + pt1.set(cos(ang)*scale,sin(ang)*scale,t); if (mProfile.size() > 0) { - LLVector3 p = mProfile[mProfile.size()-1]; + LLVector4a p = mProfile[mProfile.size()-1]; for (S32 i = 0; i < split && mProfile.size() > 0; i++) { - mProfile.push_back(p+(pt1-p) * 1.0f/(float)(split+1) * (float)(i+1)); + //mProfile.push_back(p+(pt1-p) * 1.0f/(float)(split+1) * (float)(i+1)); + LLVector4a new_pt; + new_pt.setSub(pt1, p); + new_pt.mul(1.0f/(float)(split+1) * (float)(i+1)); + new_pt.add(p); + mProfile.push_back(new_pt); } } mProfile.push_back(pt1); @@ -555,18 +619,25 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3 // pt1 is the first point on the fractional face // pt2 is the end point on the fractional face - pt2.setVec(cos(ang)*scale,sin(ang)*scale,t); + pt2.set(cos(ang)*scale,sin(ang)*scale,t); // Find the fraction that we need to add to the end point. t_fraction = (end - (t - t_step))*sides; if (t_fraction > 0.0001f) { - LLVector3 new_pt = lerp(pt1, pt2, t_fraction); + LLVector4a new_pt; + new_pt.setLerp(pt1, pt2, t_fraction); if (mProfile.size() > 0) { - LLVector3 p = mProfile[mProfile.size()-1]; + LLVector4a p = mProfile[mProfile.size()-1]; for (S32 i = 0; i < split && mProfile.size() > 0; i++) { - mProfile.push_back(p+(new_pt-p) * 1.0f/(float)(split+1) * (float)(i+1)); + //mProfile.push_back(p+(new_pt-p) * 1.0f/(float)(split+1) * (float)(i+1)); + + LLVector4a pt1; + pt1.setSub(new_pt, p); + pt1.mul(1.0f/(float)(split+1) * (float)(i+1)); + pt1.add(p); + mProfile.push_back(pt1); } } mProfile.push_back(new_pt); @@ -587,7 +658,7 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3 if (params.getHollow() <= 0) { // put center point if not hollow. - mProfile.push_back(LLVector3(0,0,0)); + mProfile.push_back(LLVector4a(0,0,0)); } } else @@ -600,103 +671,6 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3 mTotal = mProfile.size(); } -void LLProfile::genNormals(const LLProfileParams& params) -{ - S32 count = mProfile.size(); - - S32 outer_count; - if (mTotalOut) - { - outer_count = mTotalOut; - } - else - { - outer_count = mTotal / 2; - } - - mEdgeNormals.resize(count * 2); - mEdgeCenters.resize(count * 2); - mNormals.resize(count); - - LLVector2 pt0,pt1; - - BOOL hollow = (params.getHollow() > 0); - - S32 i0, i1, i2, i3, i4; - - // Parametrically generate normal - for (i2 = 0; i2 < count; i2++) - { - mNormals[i2].mV[0] = mProfile[i2].mV[0]; - mNormals[i2].mV[1] = mProfile[i2].mV[1]; - if (hollow && (i2 >= outer_count)) - { - mNormals[i2] *= -1.f; - } - if (mNormals[i2].magVec() < 0.001) - { - // Special case for point at center, get adjacent points. - i1 = (i2 - 1) >= 0 ? i2 - 1 : count - 1; - i0 = (i1 - 1) >= 0 ? i1 - 1 : count - 1; - i3 = (i2 + 1) < count ? i2 + 1 : 0; - i4 = (i3 + 1) < count ? i3 + 1 : 0; - - pt0.setVec(mProfile[i1].mV[VX] + mProfile[i1].mV[VX] - mProfile[i0].mV[VX], - mProfile[i1].mV[VY] + mProfile[i1].mV[VY] - mProfile[i0].mV[VY]); - pt1.setVec(mProfile[i3].mV[VX] + mProfile[i3].mV[VX] - mProfile[i4].mV[VX], - mProfile[i3].mV[VY] + mProfile[i3].mV[VY] - mProfile[i4].mV[VY]); - - mNormals[i2] = pt0 + pt1; - mNormals[i2] *= 0.5f; - } - mNormals[i2].normVec(); - } - - S32 num_normal_sets = isConcave() ? 2 : 1; - for (S32 normal_set = 0; normal_set < num_normal_sets; normal_set++) - { - S32 point_num; - for (point_num = 0; point_num < mTotal; point_num++) - { - LLVector3 point_1 = mProfile[point_num]; - point_1.mV[VZ] = 0.f; - - LLVector3 point_2; - - if (isConcave() && normal_set == 0 && point_num == (mTotal - 1) / 2) - { - point_2 = mProfile[mTotal - 1]; - } - else if (isConcave() && normal_set == 1 && point_num == mTotal - 1) - { - point_2 = mProfile[(mTotal - 1) / 2]; - } - else - { - LLVector3 delta_pos; - S32 neighbor_point = (point_num + 1) % mTotal; - while(delta_pos.magVecSquared() < 0.01f * 0.01f) - { - point_2 = mProfile[neighbor_point]; - delta_pos = point_2 - point_1; - neighbor_point = (neighbor_point + 1) % mTotal; - if (neighbor_point == point_num) - { - break; - } - } - } - - point_2.mV[VZ] = 0.f; - LLVector3 face_normal = (point_2 - point_1) % LLVector3::z_axis; - face_normal.normVec(); - mEdgeNormals[normal_set * count + point_num] = face_normal; - mEdgeCenters[normal_set * count + point_num] = lerp(point_1, point_2, 0.5f); - } - } -} - - // Hollow is percent of the original bounding box, not of this particular // profile's geometry. Thus, a swept triangle needs lower hollow values than // a swept square. @@ -712,12 +686,13 @@ LLProfile::Face* LLProfile::addHole(const LLProfileParams& params, BOOL flat, F3 Face *face = addFace(mTotalOut, mTotal-mTotalOut,0,LL_FACE_INNER_SIDE, flat); - std::vector<LLVector3> pt; + static LLAlignedArray<LLVector4a,64> pt; pt.resize(mTotal) ; for (S32 i=mTotalOut;i<mTotal;i++) { - pt[i] = mProfile[i] * box_hollow; + pt[i] = mProfile[i]; + pt[i].mul(box_hollow); } S32 j=mTotal-1; @@ -741,8 +716,6 @@ LLProfile::Face* LLProfile::addHole(const LLProfileParams& params, BOOL flat, F3 S32 LLProfile::getNumPoints(const LLProfileParams& params, BOOL path_open,F32 detail, S32 split, BOOL is_sculpted, S32 sculpt_size) { // this is basically LLProfile::generate stripped down to only operations that influence the number of points - LLMemType m1(LLMemType::MTYPE_VOLUME); - if (detail < MIN_LOD) { detail = MIN_LOD; @@ -853,8 +826,6 @@ S32 LLProfile::getNumPoints(const LLProfileParams& params, BOOL path_open,F32 de BOOL LLProfile::generate(const LLProfileParams& params, BOOL path_open,F32 detail, S32 split, BOOL is_sculpted, S32 sculpt_size) { - LLMemType m1(LLMemType::MTYPE_VOLUME); - if ((!mDirty) && (!is_sculpted)) { return FALSE; @@ -867,8 +838,8 @@ BOOL LLProfile::generate(const LLProfileParams& params, BOOL path_open,F32 detai detail = MIN_LOD; } - mProfile.clear(); - mFaces.clear(); + mProfile.resize(0); + mFaces.resize(0); // Generate the face data S32 i; @@ -900,10 +871,13 @@ BOOL LLProfile::generate(const LLProfileParams& params, BOOL path_open,F32 detai addFace((face_num++) * (split +1), split+2, 1, LL_FACE_OUTER_SIDE_0 << i, TRUE); } + LLVector4a scale(1,1,4,1); + for (i = 0; i <(S32) mProfile.size(); i++) { // Scale by 4 to generate proper tex coords. - mProfile[i].mV[2] *= 4.f; + mProfile[i].mul(scale); + llassert(mProfile[i].isFinite3()); } if (hollow) @@ -936,10 +910,12 @@ BOOL LLProfile::generate(const LLProfileParams& params, BOOL path_open,F32 detai case LL_PCODE_PROFILE_EQUALTRI: { genNGon(params, 3,0, 0, 1, split); + LLVector4a scale(1,1,3,1); for (i = 0; i <(S32) mProfile.size(); i++) { // Scale by 3 to generate proper tex coords. - mProfile[i].mV[2] *= 3.f; + mProfile[i].mul(scale); + llassert(mProfile[i].isFinite3()); } if (path_open) @@ -1118,8 +1094,6 @@ BOOL LLProfile::generate(const LLProfileParams& params, BOOL path_open,F32 detai } } - //genNormals(params); - return TRUE; } @@ -1127,8 +1101,6 @@ BOOL LLProfile::generate(const LLProfileParams& params, BOOL path_open,F32 detai BOOL LLProfileParams::importFile(LLFILE *fp) { - LLMemType m1(LLMemType::MTYPE_VOLUME); - const S32 BUFSIZE = 16384; char buffer[BUFSIZE]; /* Flawfinder: ignore */ // *NOTE: changing the size or type of these buffers will require @@ -1204,8 +1176,6 @@ BOOL LLProfileParams::exportFile(LLFILE *fp) const BOOL LLProfileParams::importLegacyStream(std::istream& input_stream) { - LLMemType m1(LLMemType::MTYPE_VOLUME); - const S32 BUFSIZE = 16384; char buffer[BUFSIZE]; /* Flawfinder: ignore */ // *NOTE: changing the size or type of these buffers will require @@ -1297,7 +1267,6 @@ bool LLProfileParams::fromLLSD(LLSD& sd) void LLProfileParams::copyParams(const LLProfileParams ¶ms) { - LLMemType m1(LLMemType::MTYPE_VOLUME); setCurveType(params.getCurveType()); setBegin(params.getBegin()); setEnd(params.getEnd()); @@ -1407,25 +1376,29 @@ void LLPath::genNGon(const LLPathParams& params, S32 sides, F32 startOff, F32 en // the path begins at the correct cut. F32 step= 1.0f / sides; F32 t = params.getBegin(); - pt = vector_append(mPath, 1); + pt = mPath.append(1); ang = 2.0f*F_PI*revolutions * t; s = sin(ang)*lerp(radius_start, radius_end, t); c = cos(ang)*lerp(radius_start, radius_end, t); - pt->mPos.setVec(0 + lerp(0,params.getShear().mV[0],s) + pt->mPos.set(0 + lerp(0,params.getShear().mV[0],s) + lerp(-skew ,skew, t) * 0.5f, c + lerp(0,params.getShear().mV[1],s), s); - pt->mScale.mV[VX] = hole_x * lerp(taper_x_begin, taper_x_end, t); - pt->mScale.mV[VY] = hole_y * lerp(taper_y_begin, taper_y_end, t); + pt->mScale.set(hole_x * lerp(taper_x_begin, taper_x_end, t), + hole_y * lerp(taper_y_begin, taper_y_end, t), + 0,1); pt->mTexT = t; - + // Twist rotates the path along the x,y plane (I think) - DJS 04/05/02 twist.setQuat (lerp(twist_begin,twist_end,t) * 2.f * F_PI - F_PI,0,0,1); // Rotate the point around the circle's center. qang.setQuat (ang,path_axis); - pt->mRot = twist * qang; + + LLMatrix3 rot(twist * qang); + + pt->mRot.loadu(rot); t+=step; @@ -1436,50 +1409,54 @@ void LLPath::genNGon(const LLPathParams& params, S32 sides, F32 startOff, F32 en // Run through the non-cut dependent points. while (t < params.getEnd()) { - pt = vector_append(mPath, 1); + pt = mPath.append(1); ang = 2.0f*F_PI*revolutions * t; c = cos(ang)*lerp(radius_start, radius_end, t); s = sin(ang)*lerp(radius_start, radius_end, t); - pt->mPos.setVec(0 + lerp(0,params.getShear().mV[0],s) + pt->mPos.set(0 + lerp(0,params.getShear().mV[0],s) + lerp(-skew ,skew, t) * 0.5f, c + lerp(0,params.getShear().mV[1],s), s); - pt->mScale.mV[VX] = hole_x * lerp(taper_x_begin, taper_x_end, t); - pt->mScale.mV[VY] = hole_y * lerp(taper_y_begin, taper_y_end, t); + pt->mScale.set(hole_x * lerp(taper_x_begin, taper_x_end, t), + hole_y * lerp(taper_y_begin, taper_y_end, t), + 0,1); pt->mTexT = t; // Twist rotates the path along the x,y plane (I think) - DJS 04/05/02 twist.setQuat (lerp(twist_begin,twist_end,t) * 2.f * F_PI - F_PI,0,0,1); // Rotate the point around the circle's center. qang.setQuat (ang,path_axis); - pt->mRot = twist * qang; + LLMatrix3 tmp(twist*qang); + pt->mRot.loadu(tmp); t+=step; } // Make one final pass for the end cut. t = params.getEnd(); - pt = vector_append(mPath, 1); + pt = mPath.append(1); ang = 2.0f*F_PI*revolutions * t; c = cos(ang)*lerp(radius_start, radius_end, t); s = sin(ang)*lerp(radius_start, radius_end, t); - pt->mPos.setVec(0 + lerp(0,params.getShear().mV[0],s) + pt->mPos.set(0 + lerp(0,params.getShear().mV[0],s) + lerp(-skew ,skew, t) * 0.5f, c + lerp(0,params.getShear().mV[1],s), s); - pt->mScale.mV[VX] = hole_x * lerp(taper_x_begin, taper_x_end, t); - pt->mScale.mV[VY] = hole_y * lerp(taper_y_begin, taper_y_end, t); + pt->mScale.set(hole_x * lerp(taper_x_begin, taper_x_end, t), + hole_y * lerp(taper_y_begin, taper_y_end, t), + 0,1); pt->mTexT = t; - + // Twist rotates the path along the x,y plane (I think) - DJS 04/05/02 twist.setQuat (lerp(twist_begin,twist_end,t) * 2.f * F_PI - F_PI,0,0,1); // Rotate the point around the circle's center. qang.setQuat (ang,path_axis); - pt->mRot = twist * qang; + LLMatrix3 tmp(twist*qang); + pt->mRot.loadu(tmp); mTotal = mPath.size(); } @@ -1514,8 +1491,6 @@ const LLVector2 LLPathParams::getEndScale() const S32 LLPath::getNumPoints(const LLPathParams& params, F32 detail) { // this is basically LLPath::generate stripped down to only the operations that influence the number of points - LLMemType m1(LLMemType::MTYPE_VOLUME); - if (detail < MIN_LOD) { detail = MIN_LOD; @@ -1565,8 +1540,6 @@ S32 LLPath::getNumPoints(const LLPathParams& params, F32 detail) BOOL LLPath::generate(const LLPathParams& params, F32 detail, S32 split, BOOL is_sculpted, S32 sculpt_size) { - LLMemType m1(LLMemType::MTYPE_VOLUME); - if ((!mDirty) && (!is_sculpted)) { return FALSE; @@ -1581,7 +1554,7 @@ BOOL LLPath::generate(const LLPathParams& params, F32 detail, S32 split, mDirty = FALSE; S32 np = 2; // hardcode for line - mPath.clear(); + mPath.resize(0); mOpen = TRUE; // Is this 0xf0 mask really necessary? DK 03/02/05 @@ -1607,12 +1580,16 @@ BOOL LLPath::generate(const LLPathParams& params, F32 detail, S32 split, for (S32 i=0;i<np;i++) { F32 t = lerp(params.getBegin(),params.getEnd(),(F32)i * mStep); - mPath[i].mPos.setVec(lerp(0,params.getShear().mV[0],t), + mPath[i].mPos.set(lerp(0,params.getShear().mV[0],t), lerp(0,params.getShear().mV[1],t), t - 0.5f); - mPath[i].mRot.setQuat(lerp(F_PI * params.getTwistBegin(),F_PI * params.getTwist(),t),0,0,1); - mPath[i].mScale.mV[0] = lerp(start_scale.mV[0],end_scale.mV[0],t); - mPath[i].mScale.mV[1] = lerp(start_scale.mV[1],end_scale.mV[1],t); + LLQuaternion quat; + quat.setQuat(lerp(F_PI * params.getTwistBegin(),F_PI * params.getTwist(),t),0,0,1); + LLMatrix3 tmp(quat); + mPath[i].mRot.loadu(tmp); + mPath[i].mScale.set(lerp(start_scale.mV[0],end_scale.mV[0],t), + lerp(start_scale.mV[1],end_scale.mV[1],t), + 0,1); mPath[i].mTexT = t; } } @@ -1626,7 +1603,7 @@ BOOL LLPath::generate(const LLPathParams& params, F32 detail, S32 split, S32 sides = (S32)llfloor(llfloor((MIN_DETAIL_FACES * detail + twist_mag * 3.5f * (detail-0.5f))) * params.getRevolutions()); if (is_sculpted) - sides = sculpt_size; + sides = llmax(sculpt_size, 1); genNGon(params, sides); } @@ -1649,7 +1626,7 @@ BOOL LLPath::generate(const LLPathParams& params, F32 detail, S32 split, F32 toggle = 0.5f; for (S32 i=0;i<(S32)mPath.size();i++) { - mPath[i].mPos.mV[0] = toggle; + mPath[i].mPos.getF32ptr()[0] = toggle; if (toggle == 0.5f) toggle = -0.5f; else @@ -1670,13 +1647,16 @@ BOOL LLPath::generate(const LLPathParams& params, F32 detail, S32 split, for (S32 i=0;i<np;i++) { F32 t = (F32)i * mStep; - mPath[i].mPos.setVec(0, + mPath[i].mPos.set(0, lerp(0, -sin(F_PI*params.getTwist()*t)*0.5f,t), lerp(-0.5, cos(F_PI*params.getTwist()*t)*0.5f,t)); - mPath[i].mScale.mV[0] = lerp(1,params.getScale().mV[0],t); - mPath[i].mScale.mV[1] = lerp(1,params.getScale().mV[1],t); + mPath[i].mScale.set(lerp(1,params.getScale().mV[0],t), + lerp(1,params.getScale().mV[1],t), 0,1); mPath[i].mTexT = t; - mPath[i].mRot.setQuat(F_PI * params.getTwist() * t,1,0,0); + LLQuaternion quat; + quat.setQuat(F_PI * params.getTwist() * t,1,0,0); + LLMatrix3 tmp(quat); + mPath[i].mRot.loadu(tmp); } break; @@ -1694,19 +1674,21 @@ BOOL LLPath::generate(const LLPathParams& params, F32 detail, S32 split, BOOL LLDynamicPath::generate(const LLPathParams& params, F32 detail, S32 split, BOOL is_sculpted, S32 sculpt_size) { - LLMemType m1(LLMemType::MTYPE_VOLUME); - mOpen = TRUE; // Draw end caps if (getPathLength() == 0) { // Path hasn't been generated yet. // Some algorithms later assume at least TWO path points. resizePath(2); + LLQuaternion quat; + quat.setQuat(0,0,0); + LLMatrix3 tmp(quat); + for (U32 i = 0; i < 2; i++) { - mPath[i].mPos.setVec(0, 0, 0); - mPath[i].mRot.setQuat(0, 0, 0); - mPath[i].mScale.setVec(1, 1); + mPath[i].mPos.set(0, 0, 0); + mPath[i].mRot.loadu(tmp); + mPath[i].mScale.set(1, 1, 0, 1); mPath[i].mTexT = 0; } } @@ -1717,8 +1699,6 @@ BOOL LLDynamicPath::generate(const LLPathParams& params, F32 detail, S32 split, BOOL LLPathParams::importFile(LLFILE *fp) { - LLMemType m1(LLMemType::MTYPE_VOLUME); - const S32 BUFSIZE = 16384; char buffer[BUFSIZE]; /* Flawfinder: ignore */ // *NOTE: changing the size or type of these buffers will require @@ -1863,8 +1843,6 @@ BOOL LLPathParams::exportFile(LLFILE *fp) const BOOL LLPathParams::importLegacyStream(std::istream& input_stream) { - LLMemType m1(LLMemType::MTYPE_VOLUME); - const S32 BUFSIZE = 16384; char buffer[BUFSIZE]; /* Flawfinder: ignore */ // *NOTE: changing the size or type of these buffers will require @@ -2072,8 +2050,6 @@ S32 LLVolume::sNumMeshPoints = 0; LLVolume::LLVolume(const LLVolumeParams ¶ms, const F32 detail, const BOOL generate_single_face, const BOOL is_unique) : mParams(params) { - LLMemType m1(LLMemType::MTYPE_VOLUME); - mUnique = is_unique; mFaceMask = 0x0; mDetail = detail; @@ -2119,9 +2095,9 @@ void LLVolume::regen() createVolumeFaces(); } -void LLVolume::genBinormals(S32 face) +void LLVolume::genTangents(S32 face) { - mVolumeFaces[face].createBinormals(); + mVolumeFaces[face].createTangents(); } LLVolume::~LLVolume() @@ -2145,7 +2121,7 @@ LLVolume::~LLVolume() BOOL LLVolume::generate() { - LLMemType m1(LLMemType::MTYPE_VOLUME); + LL_CHECK_MEMORY llassert_always(mProfilep); //Added 10.03.05 Dave Parks @@ -2182,20 +2158,6 @@ BOOL LLVolume::generate() mLODScaleBias.setVec(0.6f, 0.6f, 0.6f); } - //******************************************************************** - //debug info, to be removed - if((U32)(mPathp->mPath.size() * mProfilep->mProfile.size()) > (1u << 20)) - { - llinfos << "sizeS: " << mPathp->mPath.size() << " sizeT: " << mProfilep->mProfile.size() << llendl ; - llinfos << "path_detail : " << path_detail << " split: " << split << " profile_detail: " << profile_detail << llendl ; - llinfos << mParams << llendl ; - llinfos << "more info to check if mProfilep is deleted or not." << llendl ; - llinfos << mProfilep->mNormals.size() << " : " << mProfilep->mFaces.size() << " : " << mProfilep->mEdgeNormals.size() << " : " << mProfilep->mEdgeCenters.size() << llendl ; - - llerrs << "LLVolume corrupted!" << llendl ; - } - //******************************************************************** - BOOL regenPath = mPathp->generate(mParams.getPathParams(), path_detail, split); BOOL regenProf = mProfilep->generate(mParams.getProfileParams(), mPathp->isOpen(),profile_detail, split); @@ -2204,21 +2166,6 @@ BOOL LLVolume::generate() S32 sizeS = mPathp->mPath.size(); S32 sizeT = mProfilep->mProfile.size(); - //******************************************************************** - //debug info, to be removed - if((U32)(sizeS * sizeT) > (1u << 20)) - { - llinfos << "regenPath: " << (S32)regenPath << " regenProf: " << (S32)regenProf << llendl ; - llinfos << "sizeS: " << sizeS << " sizeT: " << sizeT << llendl ; - llinfos << "path_detail : " << path_detail << " split: " << split << " profile_detail: " << profile_detail << llendl ; - llinfos << mParams << llendl ; - llinfos << "more info to check if mProfilep is deleted or not." << llendl ; - llinfos << mProfilep->mNormals.size() << " : " << mProfilep->mFaces.size() << " : " << mProfilep->mEdgeNormals.size() << " : " << mProfilep->mEdgeCenters.size() << llendl ; - - llerrs << "LLVolume corrupted!" << llendl ; - } - //******************************************************************** - sNumMeshPoints -= mMesh.size(); mMesh.resize(sizeT * sizeS); sNumMeshPoints += mMesh.size(); @@ -2226,22 +2173,39 @@ BOOL LLVolume::generate() //generate vertex positions // Run along the path. + LLVector4a* dst = mMesh.mArray; + for (S32 s = 0; s < sizeS; ++s) { - LLVector2 scale = mPathp->mPath[s].mScale; - LLQuaternion rot = mPathp->mPath[s].mRot; + F32* scale = mPathp->mPath[s].mScale.getF32ptr(); + + F32 sc [] = + { scale[0], 0, 0, 0, + 0, scale[1], 0, 0, + 0, 0, scale[2], 0, + 0, 0, 0, 1 }; + + LLMatrix4 rot((F32*) mPathp->mPath[s].mRot.mMatrix); + LLMatrix4 scale_mat(sc); + + scale_mat *= rot; + + LLMatrix4a rot_mat; + rot_mat.loadu(scale_mat); + + LLVector4a* profile = mProfilep->mProfile.mArray; + LLVector4a* end_profile = profile+sizeT; + LLVector4a offset = mPathp->mPath[s].mPos; + + LLVector4a tmp; // Run along the profile. - for (S32 t = 0; t < sizeT; ++t) + while (profile < end_profile) { - S32 m = s*sizeT + t; - Point& pt = mMesh[m]; - - pt.mPos.mV[0] = mProfilep->mProfile[t].mV[0] * scale.mV[0]; - pt.mPos.mV[1] = mProfilep->mProfile[t].mV[1] * scale.mV[1]; - pt.mPos.mV[2] = 0.0f; - pt.mPos = pt.mPos * rot; - pt.mPos += mPathp->mPath[s].mPos; + rot_mat.rotate(*profile++, tmp); + dst->setAdd(tmp,offset); + llassert(dst->isFinite3()); + ++dst; } } @@ -2251,9 +2215,11 @@ BOOL LLVolume::generate() LLFaceID id = iter->mFaceID; mFaceMask |= id; } - + LL_CHECK_MEMORY return TRUE; } + + LL_CHECK_MEMORY return FALSE; } @@ -2483,6 +2449,7 @@ bool LLVolume::unpackVolumeFaces(std::istream& is, S32 size) LLVector4a pos_range; pos_range.setSub(max_pos, min_pos); LLVector2 tc_range2 = max_tc - min_tc; + LLVector4a tc_range; tc_range.set(tc_range2[0], tc_range2[1], tc_range2[0], tc_range2[1]); LLVector4a min_tc4(min_tc[0], min_tc[1], min_tc[0], min_tc[1]); @@ -2741,8 +2708,6 @@ S32 LLVolume::getNumFaces() const void LLVolume::createVolumeFaces() { - LLMemType m1(LLMemType::MTYPE_VOLUME); - if (mGenerateSingleFace) { // do nothing @@ -2833,13 +2798,15 @@ void LLVolume::createVolumeFaces() } -inline LLVector3 sculpt_rgb_to_vector(U8 r, U8 g, U8 b) +inline LLVector4a sculpt_rgb_to_vector(U8 r, U8 g, U8 b) { // maps RGB values to vector values [0..255] -> [-0.5..0.5] - LLVector3 value; - value.mV[VX] = r / 255.f - 0.5f; - value.mV[VY] = g / 255.f - 0.5f; - value.mV[VZ] = b / 255.f - 0.5f; + LLVector4a value; + LLVector4a sub(0.5f, 0.5f, 0.5f); + + value.set(r,g,b); + value.mul(1.f/255.f); + value.sub(sub); return value; } @@ -2860,21 +2827,21 @@ inline U32 sculpt_st_to_index(S32 s, S32 t, S32 size_s, S32 size_t, U16 sculpt_w } -inline LLVector3 sculpt_index_to_vector(U32 index, const U8* sculpt_data) +inline LLVector4a sculpt_index_to_vector(U32 index, const U8* sculpt_data) { - LLVector3 v = sculpt_rgb_to_vector(sculpt_data[index], sculpt_data[index+1], sculpt_data[index+2]); + LLVector4a v = sculpt_rgb_to_vector(sculpt_data[index], sculpt_data[index+1], sculpt_data[index+2]); return v; } -inline LLVector3 sculpt_st_to_vector(S32 s, S32 t, S32 size_s, S32 size_t, U16 sculpt_width, U16 sculpt_height, S8 sculpt_components, const U8* sculpt_data) +inline LLVector4a sculpt_st_to_vector(S32 s, S32 t, S32 size_s, S32 size_t, U16 sculpt_width, U16 sculpt_height, S8 sculpt_components, const U8* sculpt_data) { U32 index = sculpt_st_to_index(s, t, size_s, size_t, sculpt_width, sculpt_height, sculpt_components); return sculpt_index_to_vector(index, sculpt_data); } -inline LLVector3 sculpt_xy_to_vector(U32 x, U32 y, U16 sculpt_width, U16 sculpt_height, S8 sculpt_components, const U8* sculpt_data) +inline LLVector4a sculpt_xy_to_vector(U32 x, U32 y, U16 sculpt_width, U16 sculpt_height, S8 sculpt_components, const U8* sculpt_data) { U32 index = sculpt_xy_to_index(x, y, sculpt_width, sculpt_height, sculpt_components); @@ -2896,15 +2863,26 @@ F32 LLVolume::sculptGetSurfaceArea() for (S32 t = 0; t < sizeT-1; t++) { // get four corners of quad - LLVector3 p1 = mMesh[(s )*sizeT + (t )].mPos; - LLVector3 p2 = mMesh[(s+1)*sizeT + (t )].mPos; - LLVector3 p3 = mMesh[(s )*sizeT + (t+1)].mPos; - LLVector3 p4 = mMesh[(s+1)*sizeT + (t+1)].mPos; + LLVector4a& p1 = mMesh[(s )*sizeT + (t )]; + LLVector4a& p2 = mMesh[(s+1)*sizeT + (t )]; + LLVector4a& p3 = mMesh[(s )*sizeT + (t+1)]; + LLVector4a& p4 = mMesh[(s+1)*sizeT + (t+1)]; // compute the area of the quad by taking the length of the cross product of the two triangles - LLVector3 cross1 = (p1 - p2) % (p1 - p3); - LLVector3 cross2 = (p4 - p2) % (p4 - p3); - area += (cross1.magVec() + cross2.magVec()) / 2.f; + LLVector4a v0,v1,v2,v3; + v0.setSub(p1,p2); + v1.setSub(p1,p3); + v2.setSub(p4,p2); + v3.setSub(p4,p3); + + LLVector4a cross1, cross2; + cross1.setCross3(v0,v1); + cross2.setCross3(v2,v3); + + //LLVector3 cross1 = (p1 - p2) % (p1 - p3); + //LLVector3 cross2 = (p4 - p2) % (p4 - p3); + + area += (cross1.getLength3() + cross2.getLength3()).getF32() / 2.f; } } @@ -2914,8 +2892,6 @@ F32 LLVolume::sculptGetSurfaceArea() // create placeholder shape void LLVolume::sculptGeneratePlaceholder() { - LLMemType m1(LLMemType::MTYPE_VOLUME); - S32 sizeS = mPathp->mPath.size(); S32 sizeT = mProfilep->mProfile.size(); @@ -2927,7 +2903,7 @@ void LLVolume::sculptGeneratePlaceholder() for (S32 t = 0; t < sizeT; t++) { S32 i = t + line; - Point& pt = mMesh[i]; + LLVector4a& pt = mMesh[i]; F32 u = (F32)s/(sizeS-1); @@ -2935,9 +2911,13 @@ void LLVolume::sculptGeneratePlaceholder() const F32 RADIUS = (F32) 0.3; - pt.mPos.mV[0] = (F32)(sin(F_PI * v) * cos(2.0 * F_PI * u) * RADIUS); - pt.mPos.mV[1] = (F32)(sin(F_PI * v) * sin(2.0 * F_PI * u) * RADIUS); - pt.mPos.mV[2] = (F32)(cos(F_PI * v) * RADIUS); + F32* p = pt.getF32ptr(); + + p[0] = (F32)(sin(F_PI * v) * cos(2.0 * F_PI * u) * RADIUS); + p[1] = (F32)(sin(F_PI * v) * sin(2.0 * F_PI * u) * RADIUS); + p[2] = (F32)(cos(F_PI * v) * RADIUS); + + llassert(pt.isFinite3()); } line += sizeT; @@ -2952,9 +2932,6 @@ void LLVolume::sculptGenerateMapVertices(U16 sculpt_width, U16 sculpt_height, S8 BOOL sculpt_mirror = sculpt_type & LL_SCULPT_FLAG_MIRROR; BOOL reverse_horizontal = (sculpt_invert ? !sculpt_mirror : sculpt_mirror); // XOR - - LLMemType m1(LLMemType::MTYPE_VOLUME); - S32 sizeS = mPathp->mPath.size(); S32 sizeT = mProfilep->mProfile.size(); @@ -2965,7 +2942,7 @@ void LLVolume::sculptGenerateMapVertices(U16 sculpt_width, U16 sculpt_height, S8 for (S32 t = 0; t < sizeT; t++) { S32 i = t + line; - Point& pt = mMesh[i]; + LLVector4a& pt = mMesh[i]; S32 reversed_t = t; @@ -3022,12 +2999,15 @@ void LLVolume::sculptGenerateMapVertices(U16 sculpt_width, U16 sculpt_height, S8 } } - pt.mPos = sculpt_xy_to_vector(x, y, sculpt_width, sculpt_height, sculpt_components, sculpt_data); + pt = sculpt_xy_to_vector(x, y, sculpt_width, sculpt_height, sculpt_components, sculpt_data); if (sculpt_mirror) { - pt.mPos.mV[VX] *= -1.f; + LLVector4a scale(-1.f,1,1,1); + pt.mul(scale); } + + llassert(pt.isFinite3()); } line += sizeT; @@ -3103,7 +3083,6 @@ void sculpt_calc_mesh_resolution(U16 width, U16 height, U8 type, F32 detail, S32 // sculpt replaces generate() for sculpted surfaces void LLVolume::sculpt(U16 sculpt_width, U16 sculpt_height, S8 sculpt_components, const U8* sculpt_data, S32 sculpt_level) { - LLMemType m1(LLMemType::MTYPE_VOLUME); U8 sculpt_type = mParams.getSculptType(); BOOL data_is_empty = FALSE; @@ -3240,7 +3219,6 @@ bool LLVolumeParams::operator<(const LLVolumeParams ¶ms) const void LLVolumeParams::copyParams(const LLVolumeParams ¶ms) { - LLMemType m1(LLMemType::MTYPE_VOLUME); mProfileParams.copyParams(params.mProfileParams); mPathParams.copyParams(params.mPathParams); mSculptID = params.getSculptID(); @@ -3610,629 +3588,6 @@ bool LLVolumeParams::validate(U8 prof_curve, F32 prof_begin, F32 prof_end, F32 h return true; } -S32 *LLVolume::getTriangleIndices(U32 &num_indices) const -{ - LLMemType m1(LLMemType::MTYPE_VOLUME); - - S32 expected_num_triangle_indices = getNumTriangleIndices(); - if (expected_num_triangle_indices > MAX_VOLUME_TRIANGLE_INDICES) - { - // we don't allow LLVolumes with this many vertices - llwarns << "Couldn't allocate triangle indices" << llendl; - num_indices = 0; - return NULL; - } - - S32* index = new S32[expected_num_triangle_indices]; - S32 count = 0; - - // Let's do this totally diffently, as we don't care about faces... - // Counter-clockwise triangles are forward facing... - - BOOL open = getProfile().isOpen(); - BOOL hollow = (mParams.getProfileParams().getHollow() > 0); - BOOL path_open = getPath().isOpen(); - S32 size_s, size_s_out, size_t; - S32 s, t, i; - size_s = getProfile().getTotal(); - size_s_out = getProfile().getTotalOut(); - size_t = getPath().mPath.size(); - - // NOTE -- if the construction of the triangles below ever changes - // then getNumTriangleIndices() method may also have to be updated. - - if (open) /* Flawfinder: ignore */ - { - if (hollow) - { - // Open hollow -- much like the closed solid, except we - // we need to stitch up the gap between s=0 and s=size_s-1 - - for (t = 0; t < size_t - 1; t++) - { - // The outer face, first cut, and inner face - for (s = 0; s < size_s - 1; s++) - { - i = s + t*size_s; - index[count++] = i; // x,y - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s; // x,y+1 - - index[count++] = i + size_s; // x,y+1 - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s + 1; // x+1,y+1 - } - - // The other cut face - index[count++] = s + t*size_s; // x,y - index[count++] = 0 + t*size_s; // x+1,y - index[count++] = s + (t+1)*size_s; // x,y+1 - - index[count++] = s + (t+1)*size_s; // x,y+1 - index[count++] = 0 + t*size_s; // x+1,y - index[count++] = 0 + (t+1)*size_s; // x+1,y+1 - } - - // Do the top and bottom caps, if necessary - if (path_open) - { - // Top cap - S32 pt1 = 0; - S32 pt2 = size_s-1; - S32 i = (size_t - 1)*size_s; - - while (pt2 - pt1 > 1) - { - // Use the profile points instead of the mesh, since you want - // the un-transformed profile distances. - LLVector3 p1 = getProfile().mProfile[pt1]; - LLVector3 p2 = getProfile().mProfile[pt2]; - LLVector3 pa = getProfile().mProfile[pt1+1]; - LLVector3 pb = getProfile().mProfile[pt2-1]; - - p1.mV[VZ] = 0.f; - p2.mV[VZ] = 0.f; - pa.mV[VZ] = 0.f; - pb.mV[VZ] = 0.f; - - // Use area of triangle to determine backfacing - F32 area_1a2, area_1ba, area_21b, area_2ab; - area_1a2 = (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) + - (pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) + - (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]); - - area_1ba = (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) + - (pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]); - - area_21b = (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) + - (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - area_2ab = (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) + - (pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - BOOL use_tri1a2 = TRUE; - BOOL tri_1a2 = TRUE; - BOOL tri_21b = TRUE; - - if (area_1a2 < 0) - { - tri_1a2 = FALSE; - } - if (area_2ab < 0) - { - // Can't use, because it contains point b - tri_1a2 = FALSE; - } - if (area_21b < 0) - { - tri_21b = FALSE; - } - if (area_1ba < 0) - { - // Can't use, because it contains point b - tri_21b = FALSE; - } - - if (!tri_1a2) - { - use_tri1a2 = FALSE; - } - else if (!tri_21b) - { - use_tri1a2 = TRUE; - } - else - { - LLVector3 d1 = p1 - pa; - LLVector3 d2 = p2 - pb; - - if (d1.magVecSquared() < d2.magVecSquared()) - { - use_tri1a2 = TRUE; - } - else - { - use_tri1a2 = FALSE; - } - } - - if (use_tri1a2) - { - index[count++] = pt1 + i; - index[count++] = pt1 + 1 + i; - index[count++] = pt2 + i; - pt1++; - } - else - { - index[count++] = pt1 + i; - index[count++] = pt2 - 1 + i; - index[count++] = pt2 + i; - pt2--; - } - } - - // Bottom cap - pt1 = 0; - pt2 = size_s-1; - while (pt2 - pt1 > 1) - { - // Use the profile points instead of the mesh, since you want - // the un-transformed profile distances. - LLVector3 p1 = getProfile().mProfile[pt1]; - LLVector3 p2 = getProfile().mProfile[pt2]; - LLVector3 pa = getProfile().mProfile[pt1+1]; - LLVector3 pb = getProfile().mProfile[pt2-1]; - - p1.mV[VZ] = 0.f; - p2.mV[VZ] = 0.f; - pa.mV[VZ] = 0.f; - pb.mV[VZ] = 0.f; - - // Use area of triangle to determine backfacing - F32 area_1a2, area_1ba, area_21b, area_2ab; - area_1a2 = (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) + - (pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) + - (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]); - - area_1ba = (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) + - (pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]); - - area_21b = (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) + - (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - area_2ab = (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) + - (pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - BOOL use_tri1a2 = TRUE; - BOOL tri_1a2 = TRUE; - BOOL tri_21b = TRUE; - - if (area_1a2 < 0) - { - tri_1a2 = FALSE; - } - if (area_2ab < 0) - { - // Can't use, because it contains point b - tri_1a2 = FALSE; - } - if (area_21b < 0) - { - tri_21b = FALSE; - } - if (area_1ba < 0) - { - // Can't use, because it contains point b - tri_21b = FALSE; - } - - if (!tri_1a2) - { - use_tri1a2 = FALSE; - } - else if (!tri_21b) - { - use_tri1a2 = TRUE; - } - else - { - LLVector3 d1 = p1 - pa; - LLVector3 d2 = p2 - pb; - - if (d1.magVecSquared() < d2.magVecSquared()) - { - use_tri1a2 = TRUE; - } - else - { - use_tri1a2 = FALSE; - } - } - - if (use_tri1a2) - { - index[count++] = pt1; - index[count++] = pt2; - index[count++] = pt1 + 1; - pt1++; - } - else - { - index[count++] = pt1; - index[count++] = pt2; - index[count++] = pt2 - 1; - pt2--; - } - } - } - } - else - { - // Open solid - - for (t = 0; t < size_t - 1; t++) - { - // Outer face + 1 cut face - for (s = 0; s < size_s - 1; s++) - { - i = s + t*size_s; - - index[count++] = i; // x,y - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s; // x,y+1 - - index[count++] = i + size_s; // x,y+1 - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s + 1; // x+1,y+1 - } - - // The other cut face - index[count++] = (size_s - 1) + (t*size_s); // x,y - index[count++] = 0 + t*size_s; // x+1,y - index[count++] = (size_s - 1) + (t+1)*size_s; // x,y+1 - - index[count++] = (size_s - 1) + (t+1)*size_s; // x,y+1 - index[count++] = 0 + (t*size_s); // x+1,y - index[count++] = 0 + (t+1)*size_s; // x+1,y+1 - } - - // Do the top and bottom caps, if necessary - if (path_open) - { - for (s = 0; s < size_s - 2; s++) - { - index[count++] = s+1; - index[count++] = s; - index[count++] = size_s - 1; - } - - // We've got a top cap - S32 offset = (size_t - 1)*size_s; - for (s = 0; s < size_s - 2; s++) - { - // Inverted ordering from bottom cap. - index[count++] = offset + size_s - 1; - index[count++] = offset + s; - index[count++] = offset + s + 1; - } - } - } - } - else if (hollow) - { - // Closed hollow - // Outer face - - for (t = 0; t < size_t - 1; t++) - { - for (s = 0; s < size_s_out - 1; s++) - { - i = s + t*size_s; - - index[count++] = i; // x,y - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s; // x,y+1 - - index[count++] = i + size_s; // x,y+1 - index[count++] = i + 1; // x+1,y - index[count++] = i + 1 + size_s; // x+1,y+1 - } - } - - // Inner face - // Invert facing from outer face - for (t = 0; t < size_t - 1; t++) - { - for (s = size_s_out; s < size_s - 1; s++) - { - i = s + t*size_s; - - index[count++] = i; // x,y - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s; // x,y+1 - - index[count++] = i + size_s; // x,y+1 - index[count++] = i + 1; // x+1,y - index[count++] = i + 1 + size_s; // x+1,y+1 - } - } - - // Do the top and bottom caps, if necessary - if (path_open) - { - // Top cap - S32 pt1 = 0; - S32 pt2 = size_s-1; - S32 i = (size_t - 1)*size_s; - - while (pt2 - pt1 > 1) - { - // Use the profile points instead of the mesh, since you want - // the un-transformed profile distances. - LLVector3 p1 = getProfile().mProfile[pt1]; - LLVector3 p2 = getProfile().mProfile[pt2]; - LLVector3 pa = getProfile().mProfile[pt1+1]; - LLVector3 pb = getProfile().mProfile[pt2-1]; - - p1.mV[VZ] = 0.f; - p2.mV[VZ] = 0.f; - pa.mV[VZ] = 0.f; - pb.mV[VZ] = 0.f; - - // Use area of triangle to determine backfacing - F32 area_1a2, area_1ba, area_21b, area_2ab; - area_1a2 = (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) + - (pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) + - (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]); - - area_1ba = (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) + - (pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]); - - area_21b = (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) + - (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - area_2ab = (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) + - (pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - BOOL use_tri1a2 = TRUE; - BOOL tri_1a2 = TRUE; - BOOL tri_21b = TRUE; - - if (area_1a2 < 0) - { - tri_1a2 = FALSE; - } - if (area_2ab < 0) - { - // Can't use, because it contains point b - tri_1a2 = FALSE; - } - if (area_21b < 0) - { - tri_21b = FALSE; - } - if (area_1ba < 0) - { - // Can't use, because it contains point b - tri_21b = FALSE; - } - - if (!tri_1a2) - { - use_tri1a2 = FALSE; - } - else if (!tri_21b) - { - use_tri1a2 = TRUE; - } - else - { - LLVector3 d1 = p1 - pa; - LLVector3 d2 = p2 - pb; - - if (d1.magVecSquared() < d2.magVecSquared()) - { - use_tri1a2 = TRUE; - } - else - { - use_tri1a2 = FALSE; - } - } - - if (use_tri1a2) - { - index[count++] = pt1 + i; - index[count++] = pt1 + 1 + i; - index[count++] = pt2 + i; - pt1++; - } - else - { - index[count++] = pt1 + i; - index[count++] = pt2 - 1 + i; - index[count++] = pt2 + i; - pt2--; - } - } - - // Bottom cap - pt1 = 0; - pt2 = size_s-1; - while (pt2 - pt1 > 1) - { - // Use the profile points instead of the mesh, since you want - // the un-transformed profile distances. - LLVector3 p1 = getProfile().mProfile[pt1]; - LLVector3 p2 = getProfile().mProfile[pt2]; - LLVector3 pa = getProfile().mProfile[pt1+1]; - LLVector3 pb = getProfile().mProfile[pt2-1]; - - p1.mV[VZ] = 0.f; - p2.mV[VZ] = 0.f; - pa.mV[VZ] = 0.f; - pb.mV[VZ] = 0.f; - - // Use area of triangle to determine backfacing - F32 area_1a2, area_1ba, area_21b, area_2ab; - area_1a2 = (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) + - (pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) + - (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]); - - area_1ba = (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) + - (pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]); - - area_21b = (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) + - (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - area_2ab = (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) + - (pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - BOOL use_tri1a2 = TRUE; - BOOL tri_1a2 = TRUE; - BOOL tri_21b = TRUE; - - if (area_1a2 < 0) - { - tri_1a2 = FALSE; - } - if (area_2ab < 0) - { - // Can't use, because it contains point b - tri_1a2 = FALSE; - } - if (area_21b < 0) - { - tri_21b = FALSE; - } - if (area_1ba < 0) - { - // Can't use, because it contains point b - tri_21b = FALSE; - } - - if (!tri_1a2) - { - use_tri1a2 = FALSE; - } - else if (!tri_21b) - { - use_tri1a2 = TRUE; - } - else - { - LLVector3 d1 = p1 - pa; - LLVector3 d2 = p2 - pb; - - if (d1.magVecSquared() < d2.magVecSquared()) - { - use_tri1a2 = TRUE; - } - else - { - use_tri1a2 = FALSE; - } - } - - if (use_tri1a2) - { - index[count++] = pt1; - index[count++] = pt2; - index[count++] = pt1 + 1; - pt1++; - } - else - { - index[count++] = pt1; - index[count++] = pt2; - index[count++] = pt2 - 1; - pt2--; - } - } - } - } - else - { - // Closed solid. Easy case. - for (t = 0; t < size_t - 1; t++) - { - for (s = 0; s < size_s - 1; s++) - { - // Should wrap properly, but for now... - i = s + t*size_s; - - index[count++] = i; // x,y - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s; // x,y+1 - - index[count++] = i + size_s; // x,y+1 - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s + 1; // x+1,y+1 - } - } - - // Do the top and bottom caps, if necessary - if (path_open) - { - // bottom cap - for (s = 1; s < size_s - 2; s++) - { - index[count++] = s+1; - index[count++] = s; - index[count++] = 0; - } - - // top cap - S32 offset = (size_t - 1)*size_s; - for (s = 1; s < size_s - 2; s++) - { - // Inverted ordering from bottom cap. - index[count++] = offset; - index[count++] = offset + s; - index[count++] = offset + s + 1; - } - } - } - -#ifdef LL_DEBUG - // assert that we computed the correct number of indices - if (count != expected_num_triangle_indices ) - { - llerrs << "bad index count prediciton:" - << " expected=" << expected_num_triangle_indices - << " actual=" << count << llendl; - } -#endif - -#if 0 - // verify that each index does not point beyond the size of the mesh - S32 num_vertices = mMesh.size(); - for (i = 0; i < count; i+=3) - { - llinfos << index[i] << ":" << index[i+1] << ":" << index[i+2] << llendl; - llassert(index[i] < num_vertices); - llassert(index[i+1] < num_vertices); - llassert(index[i+2] < num_vertices); - } -#endif - - num_indices = count; - return index; -} - void LLVolume::getLoDTriangleCounts(const LLVolumeParams& params, S32* counts) { //attempt to approximate the number of triangles that will result from generating a volume LoD set for the //supplied LLVolumeParams -- inaccurate, but a close enough approximation for determining streaming cost @@ -4250,63 +3605,6 @@ void LLVolume::getLoDTriangleCounts(const LLVolumeParams& params, S32* counts) } } -S32 LLVolume::getNumTriangleIndices() const -{ - BOOL profile_open = getProfile().isOpen(); - BOOL hollow = (mParams.getProfileParams().getHollow() > 0); - BOOL path_open = getPath().isOpen(); - - S32 size_s, size_s_out, size_t; - size_s = getProfile().getTotal(); - size_s_out = getProfile().getTotalOut(); - size_t = getPath().mPath.size(); - - S32 count = 0; - if (profile_open) /* Flawfinder: ignore */ - { - if (hollow) - { - // Open hollow -- much like the closed solid, except we - // we need to stitch up the gap between s=0 and s=size_s-1 - count = (size_t - 1) * (((size_s -1) * 6) + 6); - } - else - { - count = (size_t - 1) * (((size_s -1) * 6) + 6); - } - } - else if (hollow) - { - // Closed hollow - // Outer face - count = (size_t - 1) * (size_s_out - 1) * 6; - - // Inner face - count += (size_t - 1) * ((size_s - 1) - size_s_out) * 6; - } - else - { - // Closed solid. Easy case. - count = (size_t - 1) * (size_s - 1) * 6; - } - - if (path_open) - { - S32 cap_triangle_count = size_s - 3; - if ( profile_open - || hollow ) - { - cap_triangle_count = size_s - 2; - } - if ( cap_triangle_count > 0 ) - { - // top and bottom caps - count += cap_triangle_count * 2 * 3; - } - } - return count; -} - S32 LLVolume::getNumTriangles(S32* vcount) const { @@ -4341,8 +3639,6 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices, const LLMatrix3& norm_mat_in, S32 face_mask) { - LLMemType m1(LLMemType::MTYPE_VOLUME); - LLMatrix4a mat; mat.loadu(mat_in); @@ -4446,7 +3742,7 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices, segments.push_back(vertices.size()); #if DEBUG_SILHOUETTE_BINORMALS vertices.push_back(face.mVertices[j].getPosition()); - vertices.push_back(face.mVertices[j].getPosition() + face.mVertices[j].mBinormal*0.1f); + vertices.push_back(face.mVertices[j].getPosition() + face.mVertices[j].mTangent*0.1f); normals.push_back(LLVector3(0,0,1)); normals.push_back(LLVector3(0,0,1)); segments.push_back(vertices.size()); @@ -4562,22 +3858,9 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices, } } -S32 LLVolume::lineSegmentIntersect(const LLVector3& start, const LLVector3& end, - S32 face, - LLVector3* intersection,LLVector2* tex_coord, LLVector3* normal, LLVector3* bi_normal) -{ - LLVector4a starta, enda; - starta.load3(start.mV); - enda.load3(end.mV); - - return lineSegmentIntersect(starta, enda, face, intersection, tex_coord, normal, bi_normal); - -} - - S32 LLVolume::lineSegmentIntersect(const LLVector4a& start, const LLVector4a& end, S32 face, - LLVector3* intersection,LLVector2* tex_coord, LLVector3* normal, LLVector3* bi_normal) + LLVector4a* intersection,LLVector2* tex_coord, LLVector4a* normal, LLVector4a* tangent_out) { S32 hit_face = -1; @@ -4615,9 +3898,9 @@ S32 LLVolume::lineSegmentIntersect(const LLVector4a& start, const LLVector4a& en if (LLLineSegmentBoxIntersect(start, end, box_center, box_size)) { - if (bi_normal != NULL) // if the caller wants binormals, we may need to generate them + if (tangent_out != NULL) // if the caller wants tangents, we may need to generate them { - genBinormals(i); + genTangents(i); } if (isUnique()) @@ -4651,7 +3934,7 @@ S32 LLVolume::lineSegmentIntersect(const LLVector4a& start, const LLVector4a& en LLVector4a intersect = dir; intersect.mul(closest_t); intersect.add(start); - intersection->set(intersect.getF32ptr()); + *intersection = intersect; } @@ -4666,19 +3949,42 @@ S32 LLVolume::lineSegmentIntersect(const LLVector4a& start, const LLVector4a& en if (normal!= NULL) { - LLVector4* norm = (LLVector4*) face.mNormals; - - *normal = ((1.f - a - b) * LLVector3(norm[idx0]) + - a * LLVector3(norm[idx1]) + - b * LLVector3(norm[idx2])); + LLVector4a* norm = face.mNormals; + + LLVector4a n1,n2,n3; + n1 = norm[idx0]; + n1.mul(1.f-a-b); + + n2 = norm[idx1]; + n2.mul(a); + + n3 = norm[idx2]; + n3.mul(b); + + n1.add(n2); + n1.add(n3); + + *normal = n1; } - if (bi_normal != NULL) + if (tangent_out != NULL) { - LLVector4* binormal = (LLVector4*) face.mBinormals; - *bi_normal = ((1.f - a - b) * LLVector3(binormal[idx0]) + - a * LLVector3(binormal[idx1]) + - b * LLVector3(binormal[idx2])); + LLVector4a* tangents = face.mTangents; + + LLVector4a t1,t2,t3; + t1 = tangents[idx0]; + t1.mul(1.f-a-b); + + t2 = tangents[idx1]; + t2.mul(a); + + t3 = tangents[idx2]; + t3.mul(b); + + t1.add(t2); + t1.add(t3); + + *tangent_out = t1; } } } @@ -4691,7 +3997,7 @@ S32 LLVolume::lineSegmentIntersect(const LLVector4a& start, const LLVector4a& en face.createOctree(); } - LLOctreeTriangleRayIntersect intersect(start, dir, &face, &closest_t, intersection, tex_coord, normal, bi_normal); + LLOctreeTriangleRayIntersect intersect(start, dir, &face, &closest_t, intersection, tex_coord, normal, tangent_out); intersect.traverse(face.mOctree); if (intersect.mHitFace) { @@ -4804,241 +4110,8 @@ BOOL equalTriangle(const S32 *a, const S32 *b) return FALSE; } -BOOL LLVolume::cleanupTriangleData( const S32 num_input_vertices, - const std::vector<Point>& input_vertices, - const S32 num_input_triangles, - S32 *input_triangles, - S32 &num_output_vertices, - LLVector3 **output_vertices, - S32 &num_output_triangles, - S32 **output_triangles) -{ - LLMemType m1(LLMemType::MTYPE_VOLUME); - - /* Testing: avoid any cleanup - static BOOL skip_cleanup = TRUE; - if ( skip_cleanup ) - { - num_output_vertices = num_input_vertices; - num_output_triangles = num_input_triangles; - - *output_vertices = new LLVector3[num_input_vertices]; - for (S32 index = 0; index < num_input_vertices; index++) - { - (*output_vertices)[index] = input_vertices[index].mPos; - } - - *output_triangles = new S32[num_input_triangles*3]; - memcpy(*output_triangles, input_triangles, 3*num_input_triangles*sizeof(S32)); // Flawfinder: ignore - return TRUE; - } - */ - - // Here's how we do this: - // Create a structure which contains the original vertex index and the - // LLVector3 data. - // "Sort" the data by the vectors - // Create an array the size of the old vertex list, with a mapping of - // old indices to new indices. - // Go through triangles, shift so the lowest index is first - // Sort triangles by first index - // Remove duplicate triangles - // Allocate and pack new triangle data. - - //LLTimer cleanupTimer; - //llinfos << "In vertices: " << num_input_vertices << llendl; - //llinfos << "In triangles: " << num_input_triangles << llendl; - - S32 i; - typedef std::multiset<LLVertexIndexPair*, lessVertex> vertex_set_t; - vertex_set_t vertex_list; - - LLVertexIndexPair *pairp = NULL; - for (i = 0; i < num_input_vertices; i++) - { - LLVertexIndexPair *new_pairp = new LLVertexIndexPair(input_vertices[i].mPos, i); - vertex_list.insert(new_pairp); - } - - // Generate the vertex mapping and the list of vertices without - // duplicates. This will crash if there are no vertices. - llassert(num_input_vertices > 0); // check for no vertices! - S32 *vertex_mapping = new S32[num_input_vertices]; - LLVector3 *new_vertices = new LLVector3[num_input_vertices]; - LLVertexIndexPair *prev_pairp = NULL; - - S32 new_num_vertices; - - new_num_vertices = 0; - for (vertex_set_t::iterator iter = vertex_list.begin(), - end = vertex_list.end(); - iter != end; iter++) - { - pairp = *iter; - if (!prev_pairp || ((pairp->mVertex - prev_pairp->mVertex).magVecSquared() >= VERTEX_SLOP_SQRD)) - { - new_vertices[new_num_vertices] = pairp->mVertex; - //llinfos << "Added vertex " << new_num_vertices << " : " << pairp->mVertex << llendl; - new_num_vertices++; - // Update the previous - prev_pairp = pairp; - } - else - { - //llinfos << "Removed duplicate vertex " << pairp->mVertex << ", distance magVecSquared() is " << (pairp->mVertex - prev_pairp->mVertex).magVecSquared() << llendl; - } - vertex_mapping[pairp->mIndex] = new_num_vertices - 1; - } - - // Iterate through triangles and remove degenerates, re-ordering vertices - // along the way. - S32 *new_triangles = new S32[num_input_triangles * 3]; - S32 new_num_triangles = 0; - - for (i = 0; i < num_input_triangles; i++) - { - S32 v1 = i*3; - S32 v2 = v1 + 1; - S32 v3 = v1 + 2; - - //llinfos << "Checking triangle " << input_triangles[v1] << ":" << input_triangles[v2] << ":" << input_triangles[v3] << llendl; - input_triangles[v1] = vertex_mapping[input_triangles[v1]]; - input_triangles[v2] = vertex_mapping[input_triangles[v2]]; - input_triangles[v3] = vertex_mapping[input_triangles[v3]]; - - if ((input_triangles[v1] == input_triangles[v2]) - || (input_triangles[v1] == input_triangles[v3]) - || (input_triangles[v2] == input_triangles[v3])) - { - //llinfos << "Removing degenerate triangle " << input_triangles[v1] << ":" << input_triangles[v2] << ":" << input_triangles[v3] << llendl; - // Degenerate triangle, skip - continue; - } - - if (input_triangles[v1] < input_triangles[v2]) - { - if (input_triangles[v1] < input_triangles[v3]) - { - // (0 < 1) && (0 < 2) - new_triangles[new_num_triangles*3] = input_triangles[v1]; - new_triangles[new_num_triangles*3+1] = input_triangles[v2]; - new_triangles[new_num_triangles*3+2] = input_triangles[v3]; - } - else - { - // (0 < 1) && (2 < 0) - new_triangles[new_num_triangles*3] = input_triangles[v3]; - new_triangles[new_num_triangles*3+1] = input_triangles[v1]; - new_triangles[new_num_triangles*3+2] = input_triangles[v2]; - } - } - else if (input_triangles[v2] < input_triangles[v3]) - { - // (1 < 0) && (1 < 2) - new_triangles[new_num_triangles*3] = input_triangles[v2]; - new_triangles[new_num_triangles*3+1] = input_triangles[v3]; - new_triangles[new_num_triangles*3+2] = input_triangles[v1]; - } - else - { - // (1 < 0) && (2 < 1) - new_triangles[new_num_triangles*3] = input_triangles[v3]; - new_triangles[new_num_triangles*3+1] = input_triangles[v1]; - new_triangles[new_num_triangles*3+2] = input_triangles[v2]; - } - new_num_triangles++; - } - - if (new_num_triangles == 0) - { - llwarns << "Created volume object with 0 faces." << llendl; - delete[] new_triangles; - delete[] vertex_mapping; - delete[] new_vertices; - return FALSE; - } - - typedef std::set<S32*, lessTriangle> triangle_set_t; - triangle_set_t triangle_list; - - for (i = 0; i < new_num_triangles; i++) - { - triangle_list.insert(&new_triangles[i*3]); - } - - // Sort through the triangle list, and delete duplicates - - S32 *prevp = NULL; - S32 *curp = NULL; - - S32 *sorted_tris = new S32[new_num_triangles*3]; - S32 cur_tri = 0; - for (triangle_set_t::iterator iter = triangle_list.begin(), - end = triangle_list.end(); - iter != end; iter++) - { - curp = *iter; - if (!prevp || !equalTriangle(prevp, curp)) - { - //llinfos << "Added triangle " << *curp << ":" << *(curp+1) << ":" << *(curp+2) << llendl; - sorted_tris[cur_tri*3] = *curp; - sorted_tris[cur_tri*3+1] = *(curp+1); - sorted_tris[cur_tri*3+2] = *(curp+2); - cur_tri++; - prevp = curp; - } - else - { - //llinfos << "Skipped triangle " << *curp << ":" << *(curp+1) << ":" << *(curp+2) << llendl; - } - } - - *output_vertices = new LLVector3[new_num_vertices]; - num_output_vertices = new_num_vertices; - for (i = 0; i < new_num_vertices; i++) - { - (*output_vertices)[i] = new_vertices[i]; - } - - *output_triangles = new S32[cur_tri*3]; - num_output_triangles = cur_tri; - memcpy(*output_triangles, sorted_tris, 3*cur_tri*sizeof(S32)); /* Flawfinder: ignore */ - - /* - llinfos << "Out vertices: " << num_output_vertices << llendl; - llinfos << "Out triangles: " << num_output_triangles << llendl; - for (i = 0; i < num_output_vertices; i++) - { - llinfos << i << ":" << (*output_vertices)[i] << llendl; - } - for (i = 0; i < num_output_triangles; i++) - { - llinfos << i << ":" << (*output_triangles)[i*3] << ":" << (*output_triangles)[i*3+1] << ":" << (*output_triangles)[i*3+2] << llendl; - } - */ - - //llinfos << "Out vertices: " << num_output_vertices << llendl; - //llinfos << "Out triangles: " << num_output_triangles << llendl; - delete[] vertex_mapping; - vertex_mapping = NULL; - delete[] new_vertices; - new_vertices = NULL; - delete[] new_triangles; - new_triangles = NULL; - delete[] sorted_tris; - sorted_tris = NULL; - triangle_list.clear(); - std::for_each(vertex_list.begin(), vertex_list.end(), DeletePointer()); - vertex_list.clear(); - - return TRUE; -} - - BOOL LLVolumeParams::importFile(LLFILE *fp) { - LLMemType m1(LLMemType::MTYPE_VOLUME); - //llinfos << "importing volume" << llendl; const S32 BUFSIZE = 16384; char buffer[BUFSIZE]; /* Flawfinder: ignore */ @@ -5093,8 +4166,6 @@ BOOL LLVolumeParams::exportFile(LLFILE *fp) const BOOL LLVolumeParams::importLegacyStream(std::istream& input_stream) { - LLMemType m1(LLMemType::MTYPE_VOLUME); - //llinfos << "importing volume" << llendl; const S32 BUFSIZE = 16384; // *NOTE: changing the size or type of this buffer will require @@ -5134,8 +4205,6 @@ BOOL LLVolumeParams::importLegacyStream(std::istream& input_stream) BOOL LLVolumeParams::exportLegacyStream(std::ostream& output_stream) const { - LLMemType m1(LLMemType::MTYPE_VOLUME); - output_stream <<"\tshape 0\n"; output_stream <<"\t{\n"; mPathParams.exportLegacyStream(output_stream); @@ -5471,14 +4540,16 @@ LLVolumeFace::LLVolumeFace() : mNumS(0), mNumT(0), mNumVertices(0), + mNumAllocatedVertices(0), mNumIndices(0), mPositions(NULL), mNormals(NULL), - mBinormals(NULL), + mTangents(NULL), mTexCoords(NULL), mIndices(NULL), mWeights(NULL), - mOctree(NULL) + mOctree(NULL), + mOptimized(FALSE) { mExtents = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*3); mExtents[0].splat(-0.5f); @@ -5494,10 +4565,11 @@ LLVolumeFace::LLVolumeFace(const LLVolumeFace& src) mNumS(0), mNumT(0), mNumVertices(0), + mNumAllocatedVertices(0), mNumIndices(0), mPositions(NULL), mNormals(NULL), - mBinormals(NULL), + mTangents(NULL), mTexCoords(NULL), mIndices(NULL), mWeights(NULL), @@ -5531,8 +4603,6 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src) freeData(); - LLVector4a::memcpyNonAliased16((F32*) mExtents, (F32*) src.mExtents, 3*sizeof(LLVector4a)); - resizeVertices(src.mNumVertices); resizeIndices(src.mNumIndices); @@ -5542,28 +4612,26 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src) S32 tc_size = (mNumVertices*sizeof(LLVector2)+0xF) & ~0xF; LLVector4a::memcpyNonAliased16((F32*) mPositions, (F32*) src.mPositions, vert_size); + + if (src.mNormals) + { LLVector4a::memcpyNonAliased16((F32*) mNormals, (F32*) src.mNormals, vert_size); + } if(src.mTexCoords) { LLVector4a::memcpyNonAliased16((F32*) mTexCoords, (F32*) src.mTexCoords, tc_size); } - else - { - ll_aligned_free_16(mTexCoords) ; - mTexCoords = NULL ; - } - - if (src.mBinormals) + if (src.mTangents) { - allocateBinormals(src.mNumVertices); - LLVector4a::memcpyNonAliased16((F32*) mBinormals, (F32*) src.mBinormals, vert_size); + allocateTangents(src.mNumVertices); + LLVector4a::memcpyNonAliased16((F32*) mTangents, (F32*) src.mTangents, vert_size); } else { - ll_aligned_free_16(mBinormals); - mBinormals = NULL; + ll_aligned_free_16(mTangents); + mTangents = NULL; } if (src.mWeights) @@ -5585,6 +4653,8 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src) LLVector4a::memcpyNonAliased16((F32*) mIndices, (F32*) src.mIndices, idx_size); } + mOptimized = src.mOptimized; + //delete return *this; } @@ -5599,16 +4669,17 @@ LLVolumeFace::~LLVolumeFace() void LLVolumeFace::freeData() { - ll_aligned_free_16(mPositions); + ll_aligned_free(mPositions); mPositions = NULL; - ll_aligned_free_16( mNormals); + + //normals and texture coordinates are part of the same buffer as mPositions, do not free them separately mNormals = NULL; - ll_aligned_free_16(mTexCoords); mTexCoords = NULL; + ll_aligned_free_16(mIndices); mIndices = NULL; - ll_aligned_free_16(mBinormals); - mBinormals = NULL; + ll_aligned_free_16(mTangents); + mTangents = NULL; ll_aligned_free_16(mWeights); mWeights = NULL; @@ -5622,52 +4693,23 @@ BOOL LLVolumeFace::create(LLVolume* volume, BOOL partial_build) delete mOctree; mOctree = NULL; + LL_CHECK_MEMORY BOOL ret = FALSE ; if (mTypeMask & CAP_MASK) { ret = createCap(volume, partial_build); + LL_CHECK_MEMORY } else if ((mTypeMask & END_MASK) || (mTypeMask & SIDE_MASK)) { ret = createSide(volume, partial_build); + LL_CHECK_MEMORY } else { llerrs << "Unknown/uninitialized face type!" << llendl; } - //update the range of the texture coordinates - if(ret) - { - mTexCoordExtents[0].setVec(1.f, 1.f) ; - mTexCoordExtents[1].setVec(0.f, 0.f) ; - - for(U32 i = 0 ; i < mNumVertices ; i++) - { - if(mTexCoordExtents[0].mV[0] > mTexCoords[i].mV[0]) - { - mTexCoordExtents[0].mV[0] = mTexCoords[i].mV[0] ; - } - if(mTexCoordExtents[1].mV[0] < mTexCoords[i].mV[0]) - { - mTexCoordExtents[1].mV[0] = mTexCoords[i].mV[0] ; - } - - if(mTexCoordExtents[0].mV[1] > mTexCoords[i].mV[1]) - { - mTexCoordExtents[0].mV[1] = mTexCoords[i].mV[1] ; - } - if(mTexCoordExtents[1].mV[1] < mTexCoords[i].mV[1]) - { - mTexCoordExtents[1].mV[1] = mTexCoords[i].mV[1] ; - } - } - mTexCoordExtents[0].mV[0] = llmax(0.f, mTexCoordExtents[0].mV[0]) ; - mTexCoordExtents[0].mV[1] = llmax(0.f, mTexCoordExtents[0].mV[1]) ; - mTexCoordExtents[1].mV[0] = llmin(1.f, mTexCoordExtents[1].mV[0]) ; - mTexCoordExtents[1].mV[1] = llmin(1.f, mTexCoordExtents[1].mV[1]) ; - } - return ret ; } @@ -5783,22 +4825,29 @@ void LLVolumeFace::optimize(F32 angle_cutoff) } } - llassert(new_face.mNumIndices == mNumIndices); - llassert(new_face.mNumVertices <= mNumVertices); if (angle_cutoff > 1.f && !mNormals) { - ll_aligned_free_16(new_face.mNormals); + // Now alloc'd with positions + //ll_aligned_free_16(new_face.mNormals); new_face.mNormals = NULL; } if (!mTexCoords) { - ll_aligned_free_16(new_face.mTexCoords); + // Now alloc'd with positions + //ll_aligned_free_16(new_face.mTexCoords); new_face.mTexCoords = NULL; } - swapData(new_face); + // Only swap data if we've actually optimized the mesh + // + if (new_face.mNumVertices <= mNumVertices) + { + llassert(new_face.mNumIndices == mNumIndices); + swapData(new_face); + } + } class LLVCacheTriangleData; @@ -5808,14 +4857,14 @@ class LLVCacheVertexData public: S32 mIdx; S32 mCacheTag; - F32 mScore; + F64 mScore; U32 mActiveTriangles; std::vector<LLVCacheTriangleData*> mTriangles; LLVCacheVertexData() { mCacheTag = -1; - mScore = 0.f; + mScore = 0.0; mActiveTriangles = 0; mIdx = -1; } @@ -5825,13 +4874,13 @@ class LLVCacheTriangleData { public: bool mActive; - F32 mScore; + F64 mScore; LLVCacheVertexData* mVertex[3]; LLVCacheTriangleData() { mActive = true; - mScore = 0.f; + mScore = 0.0; mVertex[0] = mVertex[1] = mVertex[2] = NULL; } @@ -5842,7 +4891,7 @@ public: { if (mVertex[i]) { - llassert_always(mVertex[i]->mActiveTriangles > 0); + llassert(mVertex[i]->mActiveTriangles > 0); mVertex[i]->mActiveTriangles--; } } @@ -5854,20 +4903,20 @@ public: } }; -const F32 FindVertexScore_CacheDecayPower = 1.5f; -const F32 FindVertexScore_LastTriScore = 0.75f; -const F32 FindVertexScore_ValenceBoostScale = 2.0f; -const F32 FindVertexScore_ValenceBoostPower = 0.5f; +const F64 FindVertexScore_CacheDecayPower = 1.5; +const F64 FindVertexScore_LastTriScore = 0.75; +const F64 FindVertexScore_ValenceBoostScale = 2.0; +const F64 FindVertexScore_ValenceBoostPower = 0.5; const U32 MaxSizeVertexCache = 32; +const F64 FindVertexScore_Scaler = 1.0/(MaxSizeVertexCache-3); -F32 find_vertex_score(LLVCacheVertexData& data) +F64 find_vertex_score(LLVCacheVertexData& data) { - if (data.mActiveTriangles == 0) - { //no triangle references this vertex - return -1.f; - } + F64 score = -1.0; - F32 score = 0.f; + if (data.mActiveTriangles >= 0) + { + score = 0.0; S32 cache_idx = data.mCacheTag; @@ -5883,15 +4932,15 @@ F32 find_vertex_score(LLVCacheVertexData& data) } else { //more points for being higher in the cache - F32 scaler = 1.f/(MaxSizeVertexCache-3); - score = 1.f-((cache_idx-3)*scaler); - score = powf(score, FindVertexScore_CacheDecayPower); + score = 1.0-((cache_idx-3)*FindVertexScore_Scaler); + score = pow(score, FindVertexScore_CacheDecayPower); } } //bonus points for having low valence - F32 valence_boost = powf((F32)data.mActiveTriangles, -FindVertexScore_ValenceBoostPower); + F64 valence_boost = pow((F64)data.mActiveTriangles, -FindVertexScore_ValenceBoostPower); score += FindVertexScore_ValenceBoostScale * valence_boost; + } return score; } @@ -5998,32 +5047,44 @@ public: void updateScores() { - for (U32 i = MaxSizeVertexCache; i < MaxSizeVertexCache+3; ++i) - { //trailing 3 vertices aren't actually in the cache for scoring purposes - if (mCache[i]) + LLVCacheVertexData** data_iter = mCache+MaxSizeVertexCache; + LLVCacheVertexData** end_data = mCache+MaxSizeVertexCache+3; + + while(data_iter != end_data) + { + LLVCacheVertexData* data = *data_iter++; + //trailing 3 vertices aren't actually in the cache for scoring purposes + if (data) { - mCache[i]->mCacheTag = -1; + data->mCacheTag = -1; } } - for (U32 i = 0; i < MaxSizeVertexCache; ++i) + data_iter = mCache; + end_data = mCache+MaxSizeVertexCache; + + while (data_iter != end_data) { //update scores of vertices in cache - if (mCache[i]) + LLVCacheVertexData* data = *data_iter++; + if (data) { - mCache[i]->mScore = find_vertex_score(*(mCache[i])); - llassert_always(mCache[i]->mCacheTag == i); + data->mScore = find_vertex_score(*data); } } mBestTriangle = NULL; //update triangle scores - for (U32 i = 0; i < MaxSizeVertexCache+3; ++i) + data_iter = mCache; + end_data = mCache+MaxSizeVertexCache+3; + + while (data_iter != end_data) { - if (mCache[i]) + LLVCacheVertexData* data = *data_iter++; + if (data) { - for (U32 j = 0; j < mCache[i]->mTriangles.size(); ++j) + for (std::vector<LLVCacheTriangleData*>::iterator iter = data->mTriangles.begin(), end_iter = data->mTriangles.end(); iter != end_iter; ++iter) { - LLVCacheTriangleData* tri = mCache[i]->mTriangles[j]; + LLVCacheTriangleData* tri = *iter; if (tri->mActive) { tri->mScore = tri->mVertex[0]->mScore; @@ -6040,13 +5101,17 @@ public: } //knock trailing 3 vertices off the cache - for (U32 i = MaxSizeVertexCache; i < MaxSizeVertexCache+3; ++i) + data_iter = mCache+MaxSizeVertexCache; + end_data = mCache+MaxSizeVertexCache+3; + while (data_iter != end_data) { - if (mCache[i]) + LLVCacheVertexData* data = *data_iter; + if (data) { - llassert_always(mCache[i]->mCacheTag == -1); - mCache[i] = NULL; + llassert(data->mCacheTag == -1); + *data_iter = NULL; } + ++data_iter; } } }; @@ -6056,6 +5121,9 @@ void LLVolumeFace::cacheOptimize() { //optimize for vertex cache according to Forsyth method: // http://home.comcast.net/~tom_forsyth/papers/fast_vert_cache_opt.html + llassert(!mOptimized); + mOptimized = TRUE; + LLVCacheLRU cache; if (mNumVertices < 3) @@ -6101,12 +5169,14 @@ void LLVolumeFace::cacheOptimize() for (U32 i = 0; i < mNumVertices; i++) { //initialize score values (no cache -- might try a fifo cache here) - vertex_data[i].mScore = find_vertex_score(vertex_data[i]); - vertex_data[i].mActiveTriangles = vertex_data[i].mTriangles.size(); + LLVCacheVertexData& data = vertex_data[i]; + + data.mScore = find_vertex_score(data); + data.mActiveTriangles = data.mTriangles.size(); - for (U32 j = 0; j < vertex_data[i].mTriangles.size(); ++j) + for (U32 j = 0; j < data.mActiveTriangles; ++j) { - vertex_data[i].mTriangles[j]->mScore += vertex_data[i].mScore; + data.mTriangles[j]->mScore += data.mScore; } } @@ -6176,10 +5246,10 @@ void LLVolumeFace::cacheOptimize() //allocate space for new buffer S32 num_verts = mNumVertices; - LLVector4a* pos = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); - LLVector4a* norm = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); S32 size = ((num_verts*sizeof(LLVector2)) + 0xF) & ~0xF; - LLVector2* tc = (LLVector2*) ll_aligned_malloc_16(size); + LLVector4a* pos = (LLVector4a*) ll_aligned_malloc(sizeof(LLVector4a)*2*num_verts+size, 64); + LLVector4a* norm = pos + num_verts; + LLVector2* tc = (LLVector2*) (norm + num_verts); LLVector4a* wght = NULL; if (mWeights) @@ -6188,7 +5258,7 @@ void LLVolumeFace::cacheOptimize() } LLVector4a* binorm = NULL; - if (mBinormals) + if (mTangents) { binorm = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); } @@ -6213,9 +5283,9 @@ void LLVolumeFace::cacheOptimize() { wght[cur_idx] = mWeights[idx]; } - if (mBinormals) + if (mTangents) { - binorm[cur_idx] = mBinormals[idx]; + binorm[cur_idx] = mTangents[idx]; } cur_idx++; @@ -6227,17 +5297,16 @@ void LLVolumeFace::cacheOptimize() mIndices[i] = new_idx[mIndices[i]]; } - ll_aligned_free_16(mPositions); - ll_aligned_free_16(mNormals); - ll_aligned_free_16(mTexCoords); + ll_aligned_free(mPositions); + // DO NOT free mNormals and mTexCoords as they are part of mPositions buffer ll_aligned_free_16(mWeights); - ll_aligned_free_16(mBinormals); + ll_aligned_free_16(mTangents); mPositions = pos; mNormals = norm; mTexCoords = tc; mWeights = wght; - mBinormals = binorm; + mTangents = binorm; //std::string result = llformat("ACMR pre/post: %.3f/%.3f -- %d triangles %d breaks", pre_acmr, post_acmr, mNumIndices/3, breaks); //llinfos << result << llendl; @@ -6318,7 +5387,7 @@ void LLVolumeFace::swapData(LLVolumeFace& rhs) { llswap(rhs.mPositions, mPositions); llswap(rhs.mNormals, mNormals); - llswap(rhs.mBinormals, mBinormals); + llswap(rhs.mTangents, mTangents); llswap(rhs.mTexCoords, mTexCoords); llswap(rhs.mIndices,mIndices); llswap(rhs.mNumVertices, mNumVertices); @@ -6351,20 +5420,15 @@ void LerpPlanarVertex(LLVolumeFace::VertexData& v0, BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) { - LLMemType m1(LLMemType::MTYPE_VOLUME); - - const std::vector<LLVolume::Point>& mesh = volume->getMesh(); - const std::vector<LLVector3>& profile = volume->getProfile().mProfile; + LL_CHECK_MEMORY + + const LLAlignedArray<LLVector4a,64>& mesh = volume->getMesh(); + const LLAlignedArray<LLVector4a,64>& profile = volume->getProfile().mProfile; S32 max_s = volume->getProfile().getTotal(); S32 max_t = volume->getPath().mPath.size(); // S32 i; - S32 num_vertices = 0, num_indices = 0; S32 grid_size = (profile.size()-1)/4; - S32 quad_count = (grid_size * grid_size); - - num_vertices = (grid_size+1)*(grid_size+1); - num_indices = quad_count * 4; LLVector4a& min = mExtents[0]; LLVector4a& max = mExtents[1]; @@ -6384,9 +5448,9 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) VertexData baseVert; for(S32 t = 0; t < 4; t++) { - corners[t].getPosition().load3( mesh[offset + (grid_size*t)].mPos.mV); - corners[t].mTexCoord.mV[0] = profile[grid_size*t].mV[0]+0.5f; - corners[t].mTexCoord.mV[1] = 0.5f - profile[grid_size*t].mV[1]; + corners[t].getPosition().load4a(mesh[offset + (grid_size*t)].getF32ptr()); + corners[t].mTexCoord.mV[0] = profile[grid_size*t][0]+0.5f; + corners[t].mTexCoord.mV[1] = 0.5f - profile[grid_size*t][1]; } { @@ -6414,22 +5478,11 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) corners[2].mTexCoord=swap; } - LLVector4a binormal; - - calc_binormal_from_triangle( binormal, - corners[0].getPosition(), corners[0].mTexCoord, - corners[1].getPosition(), corners[1].mTexCoord, - corners[2].getPosition(), corners[2].mTexCoord); - - binormal.normalize3fast(); - S32 size = (grid_size+1)*(grid_size+1); resizeVertices(size); - allocateBinormals(size); - + LLVector4a* pos = (LLVector4a*) mPositions; LLVector4a* norm = (LLVector4a*) mNormals; - LLVector4a* binorm = (LLVector4a*) mBinormals; LLVector2* tc = (LLVector2*) mTexCoords; for(int gx = 0;gx<grid_size+1;gx++) @@ -6448,8 +5501,7 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) *pos++ = newVert.getPosition(); *norm++ = baseVert.getNormal(); *tc++ = newVert.mTexCoord; - *binorm++ = binormal; - + if (gx == 0 && gy == 0) { min = newVert.getPosition(); @@ -6497,14 +5549,13 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) } } + LL_CHECK_MEMORY return TRUE; } BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) { - LLMemType m1(LLMemType::MTYPE_VOLUME); - if (!(mTypeMask & HOLLOW_MASK) && !(mTypeMask & OPEN_MASK) && ((volume->getParams().getPathParams().getBegin()==0.0f)&& @@ -6517,8 +5568,8 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) S32 num_vertices = 0, num_indices = 0; - const std::vector<LLVolume::Point>& mesh = volume->getMesh(); - const std::vector<LLVector3>& profile = volume->getProfile().mProfile; + const LLAlignedArray<LLVector4a,64>& mesh = volume->getMesh(); + const LLAlignedArray<LLVector4a,64>& profile = volume->getProfile().mProfile; // All types of caps have the same number of vertices and indices num_vertices = profile.size(); @@ -6527,8 +5578,7 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) if (!(mTypeMask & HOLLOW_MASK) && !(mTypeMask & OPEN_MASK)) { resizeVertices(num_vertices+1); - allocateBinormals(num_vertices+1); - + if (!partial_build) { resizeIndices(num_indices+3); @@ -6537,14 +5587,14 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) else { resizeVertices(num_vertices); - allocateBinormals(num_vertices); - if (!partial_build) { resizeIndices(num_indices); } } + LL_CHECK_MEMORY; + S32 max_s = volume->getProfile().getTotal(); S32 max_t = volume->getPath().mPath.size(); @@ -6572,67 +5622,79 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) LLVector2* tc = (LLVector2*) mTexCoords; LLVector4a* pos = (LLVector4a*) mPositions; LLVector4a* norm = (LLVector4a*) mNormals; - LLVector4a* binorm = (LLVector4a*) mBinormals; - + // Copy the vertices into the array - for (S32 i = 0; i < num_vertices; i++) + + const LLVector4a* src = mesh.mArray+offset; + const LLVector4a* end = src+num_vertices; + + min = *src; + max = min; + + + const LLVector4a* p = profile.mArray; + + if (mTypeMask & TOP_MASK) { - if (mTypeMask & TOP_MASK) + min_uv.set((*p)[0]+0.5f, + (*p)[1]+0.5f); + + max_uv = min_uv; + + while(src < end) { - tc[i].mV[0] = profile[i].mV[0]+0.5f; - tc[i].mV[1] = profile[i].mV[1]+0.5f; + tc->mV[0] = (*p)[0]+0.5f; + tc->mV[1] = (*p)[1]+0.5f; + + llassert(src->isFinite3()); + update_min_max(min,max,*src); + update_min_max(min_uv, max_uv, *tc); + + *pos = *src; + + llassert(pos->isFinite3()); + + ++p; + ++tc; + ++src; + ++pos; + } } else { + + min_uv.set((*p)[0]+0.5f, + 0.5f - (*p)[1]); + max_uv = min_uv; + + while(src < end) + { // Mirror for underside. - tc[i].mV[0] = profile[i].mV[0]+0.5f; - tc[i].mV[1] = 0.5f - profile[i].mV[1]; - } + tc->mV[0] = (*p)[0]+0.5f; + tc->mV[1] = 0.5f - (*p)[1]; + + llassert(src->isFinite3()); + update_min_max(min,max,*src); + update_min_max(min_uv, max_uv, *tc); - pos[i].load3(mesh[i + offset].mPos.mV); + *pos = *src; - if (i == 0) - { - max = pos[i]; - min = max; - min_uv = max_uv = tc[i]; - } - else - { - update_min_max(min,max,pos[i]); - update_min_max(min_uv, max_uv, tc[i]); + llassert(pos->isFinite3()); + + ++p; + ++tc; + ++src; + ++pos; } } + LL_CHECK_MEMORY + mCenter->setAdd(min, max); mCenter->mul(0.5f); cuv = (min_uv + max_uv)*0.5f; - LLVector4a binormal; - calc_binormal_from_triangle(binormal, - *mCenter, cuv, - pos[0], tc[0], - pos[1], tc[1]); - binormal.normalize3fast(); - - LLVector4a normal; - LLVector4a d0, d1; - - - d0.setSub(*mCenter, pos[0]); - d1.setSub(*mCenter, pos[1]); - - if (mTypeMask & TOP_MASK) - { - normal.setCross3(d0, d1); - } - else - { - normal.setCross3(d1, d0); - } - - normal.normalize3fast(); VertexData vd; vd.setPosition(*mCenter); @@ -6640,17 +5702,13 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) if (!(mTypeMask & HOLLOW_MASK) && !(mTypeMask & OPEN_MASK)) { - pos[num_vertices] = *mCenter; - tc[num_vertices] = cuv; + *pos++ = *mCenter; + *tc++ = cuv; num_vertices++; } - for (S32 i = 0; i < num_vertices; i++) - { - binorm[i].load4a(binormal.getF32ptr()); - norm[i].load4a(normal.getF32ptr()); - } - + LL_CHECK_MEMORY + if (partial_build) { return TRUE; @@ -6669,33 +5727,38 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) { // Use the profile points instead of the mesh, since you want // the un-transformed profile distances. - LLVector3 p1 = profile[pt1]; - LLVector3 p2 = profile[pt2]; - LLVector3 pa = profile[pt1+1]; - LLVector3 pb = profile[pt2-1]; + const LLVector4a& p1 = profile[pt1]; + const LLVector4a& p2 = profile[pt2]; + const LLVector4a& pa = profile[pt1+1]; + const LLVector4a& pb = profile[pt2-1]; + + const F32* p1V = p1.getF32ptr(); + const F32* p2V = p2.getF32ptr(); + const F32* paV = pa.getF32ptr(); + const F32* pbV = pb.getF32ptr(); - p1.mV[VZ] = 0.f; - p2.mV[VZ] = 0.f; - pa.mV[VZ] = 0.f; - pb.mV[VZ] = 0.f; + //p1.mV[VZ] = 0.f; + //p2.mV[VZ] = 0.f; + //pa.mV[VZ] = 0.f; + //pb.mV[VZ] = 0.f; // Use area of triangle to determine backfacing F32 area_1a2, area_1ba, area_21b, area_2ab; - area_1a2 = (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) + - (pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) + - (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]); + area_1a2 = (p1V[0]*paV[1] - paV[0]*p1V[1]) + + (paV[0]*p2V[1] - p2V[0]*paV[1]) + + (p2V[0]*p1V[1] - p1V[0]*p2V[1]); - area_1ba = (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) + - (pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]); + area_1ba = (p1V[0]*pbV[1] - pbV[0]*p1V[1]) + + (pbV[0]*paV[1] - paV[0]*pbV[1]) + + (paV[0]*p1V[1] - p1V[0]*paV[1]); - area_21b = (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) + - (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); + area_21b = (p2V[0]*p1V[1] - p1V[0]*p2V[1]) + + (p1V[0]*pbV[1] - pbV[0]*p1V[1]) + + (pbV[0]*p2V[1] - p2V[0]*pbV[1]); - area_2ab = (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) + - (pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); + area_2ab = (p2V[0]*paV[1] - paV[0]*p2V[1]) + + (paV[0]*pbV[1] - pbV[0]*paV[1]) + + (pbV[0]*p2V[1] - p2V[0]*pbV[1]); BOOL use_tri1a2 = TRUE; BOOL tri_1a2 = TRUE; @@ -6730,10 +5793,13 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) } else { - LLVector3 d1 = p1 - pa; - LLVector3 d2 = p2 - pb; + LLVector4a d1; + d1.setSub(p1, pa); + + LLVector4a d2; + d2.setSub(p2, pb); - if (d1.magVecSquared() < d2.magVecSquared()) + if (d1.dot3(d1) < d2.dot3(d2)) { use_tri1a2 = TRUE; } @@ -6772,33 +5838,33 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) { // Use the profile points instead of the mesh, since you want // the un-transformed profile distances. - LLVector3 p1 = profile[pt1]; - LLVector3 p2 = profile[pt2]; - LLVector3 pa = profile[pt1+1]; - LLVector3 pb = profile[pt2-1]; + const LLVector4a& p1 = profile[pt1]; + const LLVector4a& p2 = profile[pt2]; + const LLVector4a& pa = profile[pt1+1]; + const LLVector4a& pb = profile[pt2-1]; - p1.mV[VZ] = 0.f; - p2.mV[VZ] = 0.f; - pa.mV[VZ] = 0.f; - pb.mV[VZ] = 0.f; + const F32* p1V = p1.getF32ptr(); + const F32* p2V = p2.getF32ptr(); + const F32* paV = pa.getF32ptr(); + const F32* pbV = pb.getF32ptr(); // Use area of triangle to determine backfacing F32 area_1a2, area_1ba, area_21b, area_2ab; - area_1a2 = (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) + - (pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) + - (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]); + area_1a2 = (p1V[0]*paV[1] - paV[0]*p1V[1]) + + (paV[0]*p2V[1] - p2V[0]*paV[1]) + + (p2V[0]*p1V[1] - p1V[0]*p2V[1]); - area_1ba = (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) + - (pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]); + area_1ba = (p1V[0]*pbV[1] - pbV[0]*p1V[1]) + + (pbV[0]*paV[1] - paV[0]*pbV[1]) + + (paV[0]*p1V[1] - p1V[0]*paV[1]); - area_21b = (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) + - (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); + area_21b = (p2V[0]*p1V[1] - p1V[0]*p2V[1]) + + (p1V[0]*pbV[1] - pbV[0]*p1V[1]) + + (pbV[0]*p2V[1] - p2V[0]*pbV[1]); - area_2ab = (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) + - (pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); + area_2ab = (p2V[0]*paV[1] - paV[0]*p2V[1]) + + (paV[0]*pbV[1] - pbV[0]*paV[1]) + + (pbV[0]*p2V[1] - p2V[0]*pbV[1]); BOOL use_tri1a2 = TRUE; BOOL tri_1a2 = TRUE; @@ -6833,10 +5899,12 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) } else { - LLVector3 d1 = p1 - pa; - LLVector3 d2 = p2 - pb; + LLVector4a d1; + d1.setSub(p1,pa); + LLVector4a d2; + d2.setSub(p2,pb); - if (d1.magVecSquared() < d2.magVecSquared()) + if (d1.dot3(d1) < d2.dot3(d2)) { use_tri1a2 = TRUE; } @@ -6885,65 +5953,70 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) } + + LLVector4a d0,d1; + LL_CHECK_MEMORY + + d0.setSub(mPositions[mIndices[1]], mPositions[mIndices[0]]); + d1.setSub(mPositions[mIndices[2]], mPositions[mIndices[0]]); + + LLVector4a normal; + normal.setCross3(d0,d1); + + if (normal.dot3(normal).getF32() > F_APPROXIMATELY_ZERO) + { + normal.normalize3fast(); + } + else + { //degenerate, make up a value + normal.set(0,0,1); + } + + llassert(llfinite(normal.getF32ptr()[0])); + llassert(llfinite(normal.getF32ptr()[1])); + llassert(llfinite(normal.getF32ptr()[2])); + + llassert(!llisnan(normal.getF32ptr()[0])); + llassert(!llisnan(normal.getF32ptr()[1])); + llassert(!llisnan(normal.getF32ptr()[2])); + + for (S32 i = 0; i < num_vertices; i++) + { + norm[i].load4a(normal.getF32ptr()); + } + return TRUE; } -void LLVolumeFace::createBinormals() +void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVector4a *normal, + const LLVector2 *texcoord, U32 triangleCount, const U16* index_array, LLVector4a *tangent); + +void LLVolumeFace::createTangents() { - LLMemType m1(LLMemType::MTYPE_VOLUME); - - if (!mBinormals) + if (!mTangents) { - allocateBinormals(mNumVertices); + allocateTangents(mNumVertices); - //generate binormals - LLVector4a* pos = mPositions; - LLVector2* tc = (LLVector2*) mTexCoords; - LLVector4a* binorm = (LLVector4a*) mBinormals; + //generate tangents + //LLVector4a* pos = mPositions; + //LLVector2* tc = (LLVector2*) mTexCoords; + LLVector4a* binorm = (LLVector4a*) mTangents; - LLVector4a* end = mBinormals+mNumVertices; + LLVector4a* end = mTangents+mNumVertices; while (binorm < end) { (*binorm++).clear(); } - binorm = mBinormals; + binorm = mTangents; - for (U32 i = 0; i < mNumIndices/3; i++) - { //for each triangle - const U16& i0 = mIndices[i*3+0]; - const U16& i1 = mIndices[i*3+1]; - const U16& i2 = mIndices[i*3+2]; - - //calculate binormal - LLVector4a binormal; - calc_binormal_from_triangle(binormal, - pos[i0], tc[i0], - pos[i1], tc[i1], - pos[i2], tc[i2]); - - - //add triangle normal to vertices - binorm[i0].add(binormal); - binorm[i1].add(binormal); - binorm[i2].add(binormal); - - //even out quad contributions - if (i % 2 == 0) - { - binorm[i2].add(binormal); - } - else - { - binorm[i1].add(binormal); - } - } + CalculateTangentArray(mNumVertices, mPositions, mNormals, mTexCoords, mNumIndices/3, mIndices, mTangents); - //normalize binormals + //normalize tangents for (U32 i = 0; i < mNumVertices; i++) { - binorm[i].normalize3fast(); + //binorm[i].normalize3fast(); //bump map/planar projection code requires normals to be normalized mNormals[i].normalize3fast(); } @@ -6952,24 +6025,22 @@ void LLVolumeFace::createBinormals() void LLVolumeFace::resizeVertices(S32 num_verts) { - ll_aligned_free_16(mPositions); - ll_aligned_free_16(mNormals); - ll_aligned_free_16(mBinormals); - ll_aligned_free_16(mTexCoords); + ll_aligned_free(mPositions); + //DO NOT free mNormals and mTexCoords as they are part of mPositions buffer + ll_aligned_free_16(mTangents); - mBinormals = NULL; + mTangents = NULL; if (num_verts) { - mPositions = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); - assert_aligned(mPositions, 16); - mNormals = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); - assert_aligned(mNormals, 16); - //pad texture coordinate block end to allow for QWORD reads S32 size = ((num_verts*sizeof(LLVector2)) + 0xF) & ~0xF; - mTexCoords = (LLVector2*) ll_aligned_malloc_16(size); - assert_aligned(mTexCoords, 16); + + mPositions = (LLVector4a*) ll_aligned_malloc(sizeof(LLVector4a)*2*num_verts+size, 64); + mNormals = mPositions+num_verts; + mTexCoords = (LLVector2*) (mNormals+num_verts); + + ll_assert_aligned(mPositions, 64); } else { @@ -6979,6 +6050,7 @@ void LLVolumeFace::resizeVertices(S32 num_verts) } mNumVertices = num_verts; + mNumAllocatedVertices = num_verts; } void LLVolumeFace::pushVertex(const LLVolumeFace::VertexData& cv) @@ -6989,23 +6061,42 @@ void LLVolumeFace::pushVertex(const LLVolumeFace::VertexData& cv) void LLVolumeFace::pushVertex(const LLVector4a& pos, const LLVector4a& norm, const LLVector2& tc) { S32 new_verts = mNumVertices+1; - S32 new_size = new_verts*16; -// S32 old_size = mNumVertices*16; + + if (new_verts > mNumAllocatedVertices) + { + //double buffer size on expansion + new_verts *= 2; + + S32 new_tc_size = ((new_verts*8)+0xF) & ~0xF; + S32 old_tc_size = ((mNumVertices*8)+0xF) & ~0xF; + + S32 old_vsize = mNumVertices*16; + + S32 new_size = new_verts*16*2+new_tc_size; + + LLVector4a* old_buf = mPositions; + + mPositions = (LLVector4a*) ll_aligned_malloc(new_size, 64); + mNormals = mPositions+new_verts; + mTexCoords = (LLVector2*) (mNormals+new_verts); //positions - mPositions = (LLVector4a*) realloc(mPositions, new_size); + LLVector4a::memcpyNonAliased16((F32*) mPositions, (F32*) old_buf, old_vsize); //normals - mNormals = (LLVector4a*) realloc(mNormals, new_size); - + LLVector4a::memcpyNonAliased16((F32*) mNormals, (F32*) (old_buf+mNumVertices), old_vsize); + //tex coords - new_size = ((new_verts*8)+0xF) & ~0xF; - mTexCoords = (LLVector2*) realloc(mTexCoords, new_size); - + LLVector4a::memcpyNonAliased16((F32*) mTexCoords, (F32*) (old_buf+mNumVertices*2), old_tc_size); - //just clear binormals - ll_aligned_free_16(mBinormals); - mBinormals = NULL; + //just clear tangents + ll_aligned_free_16(mTangents); + mTangents = NULL; + ll_aligned_free(old_buf); + + mNumAllocatedVertices = new_verts; + + } mPositions[mNumVertices] = pos; mNormals[mNumVertices] = norm; @@ -7014,10 +6105,10 @@ void LLVolumeFace::pushVertex(const LLVector4a& pos, const LLVector4a& norm, con mNumVertices++; } -void LLVolumeFace::allocateBinormals(S32 num_verts) +void LLVolumeFace::allocateTangents(S32 num_verts) { - ll_aligned_free_16(mBinormals); - mBinormals = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); + ll_aligned_free_16(mTangents); + mTangents = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); } void LLVolumeFace::allocateWeights(S32 num_verts) @@ -7053,7 +6144,8 @@ void LLVolumeFace::pushIndex(const U16& idx) S32 old_size = ((mNumIndices*2)+0xF) & ~0xF; if (new_size != old_size) { - mIndices = (U16*) realloc(mIndices, new_size); + mIndices = (U16*) ll_aligned_realloc_16(mIndices, new_size, old_size); + ll_assert_aligned(mIndices,16); } mIndices[mNumIndices++] = idx; @@ -7093,13 +6185,23 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat llerrs << "Cannot append empty face." << llendl; } + U32 old_vsize = mNumVertices*16; + U32 new_vsize = new_count * 16; + U32 old_tcsize = (mNumVertices*sizeof(LLVector2)+0xF) & ~0xF; + U32 new_tcsize = (new_count*sizeof(LLVector2)+0xF) & ~0xF; + U32 new_size = new_vsize * 2 + new_tcsize; + //allocate new buffer space - mPositions = (LLVector4a*) realloc(mPositions, new_count*sizeof(LLVector4a)); - assert_aligned(mPositions, 16); - mNormals = (LLVector4a*) realloc(mNormals, new_count*sizeof(LLVector4a)); - assert_aligned(mNormals, 16); - mTexCoords = (LLVector2*) realloc(mTexCoords, (new_count*sizeof(LLVector2)+0xF) & ~0xF); - assert_aligned(mTexCoords, 16); + LLVector4a* old_buf = mPositions; + mPositions = (LLVector4a*) ll_aligned_malloc(new_size, 64); + mNormals = mPositions + new_count; + mTexCoords = (LLVector2*) (mNormals+new_count); + + mNumAllocatedVertices = new_count; + + LLVector4a::memcpyNonAliased16((F32*) mPositions, (F32*) old_buf, old_vsize); + LLVector4a::memcpyNonAliased16((F32*) mNormals, (F32*) (old_buf+mNumVertices), old_vsize); + LLVector4a::memcpyNonAliased16((F32*) mTexCoords, (F32*) (old_buf+mNumVertices*2), old_tcsize); mNumVertices = new_count; @@ -7145,7 +6247,7 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat new_count = mNumIndices + face.mNumIndices; //allocate new index buffer - mIndices = (U16*) realloc(mIndices, (new_count*sizeof(U16)+0xF) & ~0xF); + mIndices = (U16*) ll_aligned_realloc_16(mIndices, (new_count*sizeof(U16)+0xF) & ~0xF, (mNumIndices*sizeof(U16)+0xF) & ~0xF); //get destination address into new index buffer U16* dst_idx = mIndices+mNumIndices; @@ -7159,8 +6261,7 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) { - LLMemType m1(LLMemType::MTYPE_VOLUME); - + LL_CHECK_MEMORY BOOL flat = mTypeMask & FLAT_MASK; U8 sculpt_type = volume->getParams().getSculptType(); @@ -7171,9 +6272,9 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) S32 num_vertices, num_indices; - const std::vector<LLVolume::Point>& mesh = volume->getMesh(); - const std::vector<LLVector3>& profile = volume->getProfile().mProfile; - const std::vector<LLPath::PathPt>& path_data = volume->getPath().mPath; + const LLAlignedArray<LLVector4a,64>& mesh = volume->getMesh(); + const LLAlignedArray<LLVector4a,64>& profile = volume->getProfile().mProfile; + const LLAlignedArray<LLPath::PathPt,64>& path_data = volume->getPath().mPath; S32 max_s = volume->getProfile().getTotal(); @@ -7194,15 +6295,19 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) } } + LL_CHECK_MEMORY + LLVector4a* pos = (LLVector4a*) mPositions; - LLVector4a* norm = (LLVector4a*) mNormals; LLVector2* tc = (LLVector2*) mTexCoords; - S32 begin_stex = llfloor( profile[mBeginS].mV[2] ); + F32 begin_stex = floorf(profile[mBeginS][2]); S32 num_s = ((mTypeMask & INNER_MASK) && (mTypeMask & FLAT_MASK) && mNumS > 2) ? mNumS/2 : mNumS; S32 cur_vertex = 0; + S32 end_t = mBeginT+mNumT; + bool test = (mTypeMask & INNER_MASK) && (mTypeMask & FLAT_MASK) && mNumS > 2; + // Copy the vertices into the array - for (t = mBeginT; t < mBeginT + mNumT; t++) + for (t = mBeginT; t < end_t; t++) { tt = path_data[t].mTexT; for (s = 0; s < num_s; s++) @@ -7223,11 +6328,11 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) // Get s value for tex-coord. if (!flat) { - ss = profile[mBeginS + s].mV[2]; + ss = profile[mBeginS + s][2]; } else { - ss = profile[mBeginS + s].mV[2] - begin_stex; + ss = profile[mBeginS + s][2] - begin_stex; } } @@ -7247,20 +6352,15 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) i = mBeginS + s + max_s*t; } - pos[cur_vertex].load3(mesh[i].mPos.mV); - tc[cur_vertex] = LLVector2(ss,tt); + mesh[i].store4a((F32*)(pos+cur_vertex)); + tc[cur_vertex].set(ss,tt); - norm[cur_vertex].clear(); cur_vertex++; - if ((mTypeMask & INNER_MASK) && (mTypeMask & FLAT_MASK) && mNumS > 2 && s > 0) + if (test && s > 0) { - - pos[cur_vertex].load3(mesh[i].mPos.mV); - tc[cur_vertex] = LLVector2(ss,tt); - - norm[cur_vertex].clear(); - + mesh[i].store4a((F32*)(pos+cur_vertex)); + tc[cur_vertex].set(ss,tt); cur_vertex++; } } @@ -7277,28 +6377,63 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) } i = mBeginS + s + max_s*t; - ss = profile[mBeginS + s].mV[2] - begin_stex; - pos[cur_vertex].load3(mesh[i].mPos.mV); - tc[cur_vertex] = LLVector2(ss,tt); - norm[cur_vertex].clear(); + ss = profile[mBeginS + s][2] - begin_stex; + + mesh[i].store4a((F32*)(pos+cur_vertex)); + tc[cur_vertex].set(ss,tt); cur_vertex++; } - } + } + LL_CHECK_MEMORY + mCenter->clear(); + + LLVector4a* cur_pos = pos; + LLVector4a* end_pos = pos + mNumVertices; //get bounding box for this side - LLVector4a& face_min = mExtents[0]; - LLVector4a& face_max = mExtents[1]; - mCenter->clear(); + LLVector4a face_min; + LLVector4a face_max; + + face_min = face_max = *cur_pos++; + + while (cur_pos < end_pos) + { + update_min_max(face_min, face_max, *cur_pos++); + } + + mExtents[0] = face_min; + mExtents[1] = face_max; + + U32 tc_count = mNumVertices; + if (tc_count%2 == 1) + { //odd number of texture coordinates, duplicate last entry to padded end of array + tc_count++; + mTexCoords[mNumVertices] = mTexCoords[mNumVertices-1]; + } - face_min = face_max = pos[0]; + LLVector4a* cur_tc = (LLVector4a*) mTexCoords; + LLVector4a* end_tc = (LLVector4a*) (mTexCoords+tc_count); - for (U32 i = 1; i < mNumVertices; ++i) + LLVector4a tc_min; + LLVector4a tc_max; + + tc_min = tc_max = *cur_tc++; + + while (cur_tc < end_tc) { - update_min_max(face_min, face_max, pos[i]); + update_min_max(tc_min, tc_max, *cur_tc++); } + F32* minp = tc_min.getF32ptr(); + F32* maxp = tc_max.getF32ptr(); + + mTexCoordExtents[0].mV[0] = llmin(minp[0], minp[2]); + mTexCoordExtents[0].mV[1] = llmin(minp[1], minp[3]); + mTexCoordExtents[1].mV[0] = llmax(maxp[0], maxp[2]); + mTexCoordExtents[1].mV[1] = llmax(maxp[1], maxp[3]); + mCenter->setAdd(face_min, face_max); mCenter->mul(0.5f); @@ -7363,39 +6498,119 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) } } + LL_CHECK_MEMORY + //clear normals - for (U32 i = 0; i < mNumVertices; i++) + F32* dst = (F32*) mNormals; + F32* end = (F32*) (mNormals+mNumVertices); + LLVector4a zero = LLVector4a::getZero(); + + while (dst < end) { - mNormals[i].clear(); + zero.store4a(dst); + dst += 4; } + LL_CHECK_MEMORY + //generate normals - for (U32 i = 0; i < mNumIndices/3; i++) //for each triangle + U32 count = mNumIndices/3; + + LLVector4a* norm = mNormals; + + static LLAlignedArray<LLVector4a, 64> triangle_normals; + triangle_normals.resize(count); + LLVector4a* output = triangle_normals.mArray; + LLVector4a* end_output = output+count; + + U16* idx = mIndices; + + while (output < end_output) { - const U16* idx = &(mIndices[i*3]); + LLVector4a b,v1,v2; + b.load4a((F32*) (pos+idx[0])); + v1.load4a((F32*) (pos+idx[1])); + v2.load4a((F32*) (pos+idx[2])); + //calculate triangle normal + LLVector4a a; + + a.setSub(b, v1); + b.sub(v2); + - LLVector4a* v[] = - { pos+idx[0], pos+idx[1], pos+idx[2] }; + LLQuad& vector1 = *((LLQuad*) &v1); + LLQuad& vector2 = *((LLQuad*) &v2); - LLVector4a* n[] = - { norm+idx[0], norm+idx[1], norm+idx[2] }; + LLQuad& amQ = *((LLQuad*) &a); + LLQuad& bmQ = *((LLQuad*) &b); + + //v1.setCross3(t,v0); + //setCross3(const LLVector4a& a, const LLVector4a& b) + // Vectors are stored in memory in w, z, y, x order from high to low + // Set vector1 = { a[W], a[X], a[Z], a[Y] } + vector1 = _mm_shuffle_ps( amQ, amQ, _MM_SHUFFLE( 3, 0, 2, 1 )); + // Set vector2 = { b[W], b[Y], b[X], b[Z] } + vector2 = _mm_shuffle_ps( bmQ, bmQ, _MM_SHUFFLE( 3, 1, 0, 2 )); + // mQ = { a[W]*b[W], a[X]*b[Y], a[Z]*b[X], a[Y]*b[Z] } + vector2 = _mm_mul_ps( vector1, vector2 ); + // vector3 = { a[W], a[Y], a[X], a[Z] } + amQ = _mm_shuffle_ps( amQ, amQ, _MM_SHUFFLE( 3, 1, 0, 2 )); + // vector4 = { b[W], b[X], b[Z], b[Y] } + bmQ = _mm_shuffle_ps( bmQ, bmQ, _MM_SHUFFLE( 3, 0, 2, 1 )); + // mQ = { 0, a[X]*b[Y] - a[Y]*b[X], a[Z]*b[X] - a[X]*b[Z], a[Y]*b[Z] - a[Z]*b[Y] } + vector1 = _mm_sub_ps( vector2, _mm_mul_ps( amQ, bmQ )); + + llassert(v1.isFinite3()); + + v1.store4a((F32*) output); - //calculate triangle normal - LLVector4a a, b, c; - a.setSub(*v[0], *v[1]); - b.setSub(*v[0], *v[2]); - c.setCross3(a,b); + output++; + idx += 3; + } + + idx = mIndices; + + LLVector4a* src = triangle_normals.mArray; + + for (U32 i = 0; i < count; i++) //for each triangle + { + LLVector4a c; + c.load4a((F32*) (src++)); + + LLVector4a* n0p = norm+idx[0]; + LLVector4a* n1p = norm+idx[1]; + LLVector4a* n2p = norm+idx[2]; + + idx += 3; - n[0]->add(c); - n[1]->add(c); - n[2]->add(c); + LLVector4a n0,n1,n2; + n0.load4a((F32*) n0p); + n1.load4a((F32*) n1p); + n2.load4a((F32*) n2p); + n0.add(c); + n1.add(c); + n2.add(c); + + llassert(c.isFinite3()); + //even out quad contributions - n[i%2+1]->add(c); + switch (i%2+1) + { + case 0: n0.add(c); break; + case 1: n1.add(c); break; + case 2: n2.add(c); break; + }; + + n0.store4a((F32*) n0p); + n1.store4a((F32*) n1p); + n2.store4a((F32*) n2p); } + LL_CHECK_MEMORY + // adjust normals based on wrapping and stitching LLVector4a top; @@ -7527,56 +6742,107 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) } + LL_CHECK_MEMORY + return TRUE; } -// Finds binormal based on three vertices with texture coordinates. -// Fills in dummy values if the triangle has degenerate texture coordinates. -void calc_binormal_from_triangle(LLVector4a& binormal, - - const LLVector4a& pos0, - const LLVector2& tex0, - const LLVector4a& pos1, - const LLVector2& tex1, - const LLVector4a& pos2, - const LLVector2& tex2) -{ - LLVector4a rx0( pos0[VX], tex0.mV[VX], tex0.mV[VY] ); - LLVector4a rx1( pos1[VX], tex1.mV[VX], tex1.mV[VY] ); - LLVector4a rx2( pos2[VX], tex2.mV[VX], tex2.mV[VY] ); - - LLVector4a ry0( pos0[VY], tex0.mV[VX], tex0.mV[VY] ); - LLVector4a ry1( pos1[VY], tex1.mV[VX], tex1.mV[VY] ); - LLVector4a ry2( pos2[VY], tex2.mV[VX], tex2.mV[VY] ); - - LLVector4a rz0( pos0[VZ], tex0.mV[VX], tex0.mV[VY] ); - LLVector4a rz1( pos1[VZ], tex1.mV[VX], tex1.mV[VY] ); - LLVector4a rz2( pos2[VZ], tex2.mV[VX], tex2.mV[VY] ); - - LLVector4a lhs, rhs; - - LLVector4a r0; - lhs.setSub(rx0, rx1); rhs.setSub(rx0, rx2); - r0.setCross3(lhs, rhs); +//adapted from Lengyel, Eric. “Computing Tangent Space Basis Vectors for an Arbitrary Mesh”. Terathon Software 3D Graphics Library, 2001. http://www.terathon.com/code/tangent.html +void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVector4a *normal, + const LLVector2 *texcoord, U32 triangleCount, const U16* index_array, LLVector4a *tangent) +{ + //LLVector4a *tan1 = new LLVector4a[vertexCount * 2]; + LLVector4a* tan1 = (LLVector4a*) ll_aligned_malloc_16(vertexCount*2*sizeof(LLVector4a)); + + LLVector4a* tan2 = tan1 + vertexCount; + + memset(tan1, 0, vertexCount*2*sizeof(LLVector4a)); + + for (U32 a = 0; a < triangleCount; a++) + { + U32 i1 = *index_array++; + U32 i2 = *index_array++; + U32 i3 = *index_array++; + + const LLVector4a& v1 = vertex[i1]; + const LLVector4a& v2 = vertex[i2]; + const LLVector4a& v3 = vertex[i3]; + + const LLVector2& w1 = texcoord[i1]; + const LLVector2& w2 = texcoord[i2]; + const LLVector2& w3 = texcoord[i3]; + + const F32* v1ptr = v1.getF32ptr(); + const F32* v2ptr = v2.getF32ptr(); + const F32* v3ptr = v3.getF32ptr(); - LLVector4a r1; - lhs.setSub(ry0, ry1); rhs.setSub(ry0, ry2); - r1.setCross3(lhs, rhs); - - LLVector4a r2; - lhs.setSub(rz0, rz1); rhs.setSub(rz0, rz2); - r2.setCross3(lhs, rhs); + float x1 = v2ptr[0] - v1ptr[0]; + float x2 = v3ptr[0] - v1ptr[0]; + float y1 = v2ptr[1] - v1ptr[1]; + float y2 = v3ptr[1] - v1ptr[1]; + float z1 = v2ptr[2] - v1ptr[2]; + float z2 = v3ptr[2] - v1ptr[2]; + + float s1 = w2.mV[0] - w1.mV[0]; + float s2 = w3.mV[0] - w1.mV[0]; + float t1 = w2.mV[1] - w1.mV[1]; + float t2 = w3.mV[1] - w1.mV[1]; + + F32 rd = s1*t2-s2*t1; + + float r = ((rd*rd) > FLT_EPSILON) ? (1.0f / rd) + : ((rd > 0.0f) ? 1024.f : -1024.f); //some made up large ratio for division by zero + + llassert(llfinite(r)); + llassert(!llisnan(r)); + + LLVector4a sdir((t2 * x1 - t1 * x2) * r, (t2 * y1 - t1 * y2) * r, + (t2 * z1 - t1 * z2) * r); + LLVector4a tdir((s1 * x2 - s2 * x1) * r, (s1 * y2 - s2 * y1) * r, + (s1 * z2 - s2 * z1) * r); + + tan1[i1].add(sdir); + tan1[i2].add(sdir); + tan1[i3].add(sdir); + + tan2[i1].add(tdir); + tan2[i2].add(tdir); + tan2[i3].add(tdir); + } + + for (U32 a = 0; a < vertexCount; a++) + { + LLVector4a n = normal[a]; + + const LLVector4a& t = tan1[a]; + + LLVector4a ncrosst; + ncrosst.setCross3(n,t); + + // Gram-Schmidt orthogonalize + n.mul(n.dot3(t).getF32()); + + LLVector4a tsubn; + tsubn.setSub(t,n); + + if (tsubn.dot3(tsubn).getF32() > F_APPROXIMATELY_ZERO) + { + tsubn.normalize3fast(); + + // Calculate handedness + F32 handedness = ncrosst.dot3(tan2[a]).getF32() < 0.f ? -1.f : 1.f; + + tsubn.getF32ptr()[3] = handedness; - if( r0[VX] && r1[VX] && r2[VX] ) - { - binormal.set( - -r0[VZ] / r0[VX], - -r1[VZ] / r1[VX], - -r2[VZ] / r2[VX]); - // binormal.normVec(); - } - else - { - binormal.set( 0, 1 , 0 ); - } + tangent[a] = tsubn; + } + else + { //degenerate, make up a value + tangent[a].set(0,0,1,1); + } + } + + ll_aligned_free_16(tan1); } + + diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h index 76cf9de613..975227ea58 100644..100755 --- a/indra/llmath/llvolume.h +++ b/indra/llmath/llvolume.h @@ -37,7 +37,6 @@ class LLPath; template <class T> class LLOctreeNode; -class LLVector4a; class LLVolumeFace; class LLVolume; class LLVolumeTriangle; @@ -50,11 +49,15 @@ class LLVolumeTriangle; #include "v3math.h" #include "v3dmath.h" #include "v4math.h" +#include "llvector4a.h" +#include "llmatrix4a.h" #include "llquaternion.h" #include "llstrider.h" #include "v4coloru.h" #include "llrefcount.h" +#include "llpointer.h" #include "llfile.h" +#include "llalignedarray.h" //============================================================================ @@ -707,16 +710,16 @@ public: LLFaceID mFaceID; }; - std::vector<LLVector3> mProfile; - std::vector<LLVector2> mNormals; + LLAlignedArray<LLVector4a, 64> mProfile; + //LLAlignedArray<LLVector4a, 64> mNormals; std::vector<Face> mFaces; - std::vector<LLVector3> mEdgeNormals; - std::vector<LLVector3> mEdgeCenters; + + //LLAlignedArray<LLVector4a, 64> mEdgeNormals; + //LLAlignedArray<LLVector4a, 64> mEdgeCenters; friend std::ostream& operator<<(std::ostream &s, const LLProfile &profile); protected: - void genNormals(const LLProfileParams& params); static S32 getNumNGonPoints(const LLProfileParams& params, S32 sides, F32 offset=0.0f, F32 bevel = 0.0f, F32 ang_scale = 1.f, S32 split = 0); void genNGon(const LLProfileParams& params, S32 sides, F32 offset=0.0f, F32 bevel = 0.0f, F32 ang_scale = 1.f, S32 split = 0); @@ -740,13 +743,29 @@ protected: class LLPath { public: - struct PathPt + class PathPt { - LLVector3 mPos; - LLVector2 mScale; - LLQuaternion mRot; + public: + LLMatrix4a mRot; + LLVector4a mPos; + + LLVector4a mScale; F32 mTexT; - PathPt() { mPos.setVec(0,0,0); mTexT = 0; mScale.setVec(0,0); mRot.loadIdentity(); } + F32 pad[3]; //for alignment + PathPt() + { + mPos.clear(); + mTexT = 0; + mScale.clear(); + mRot.setRows(LLVector4a(1,0,0,0), + LLVector4a(0,1,0,0), + LLVector4a(0,0,1,0)); + + //distinguished data in the pad for debugging + pad[0] = 3.14159f; + pad[1] = -3.14159f; + pad[2] = 0.585f; + } }; public: @@ -778,7 +797,7 @@ public: friend std::ostream& operator<<(std::ostream &s, const LLPath &path); public: - std::vector<PathPt> mPath; + LLAlignedArray<PathPt, 64> mPath; protected: BOOL mOpen; @@ -843,12 +862,12 @@ private: public: BOOL create(LLVolume* volume, BOOL partial_build = FALSE); - void createBinormals(); + void createTangents(); void appendFace(const LLVolumeFace& face, LLMatrix4& transform, LLMatrix4& normal_tranform); void resizeVertices(S32 num_verts); - void allocateBinormals(S32 num_verts); + void allocateTangents(S32 num_verts); void allocateWeights(S32 num_verts); void resizeIndices(S32 num_indices); void fillFromLegacyData(std::vector<LLVolumeFace::VertexData>& v, std::vector<U16>& idx); @@ -911,14 +930,19 @@ public: LLVector2 mTexCoordExtents[2]; //minimum and maximum of texture coordinates of the face. S32 mNumVertices; + S32 mNumAllocatedVertices; S32 mNumIndices; LLVector4a* mPositions; LLVector4a* mNormals; - LLVector4a* mBinormals; + LLVector4a* mTangents; LLVector2* mTexCoords; U16* mIndices; + //vertex buffer filled in by LLFace to cache this volume face geometry in vram + // (declared as a LLPointer to LLRefCount to avoid dependency on LLVertexBuffer) + mutable LLPointer<LLRefCount> mVertexBuffer; + std::vector<S32> mEdge; //list of skin weights for rigged volumes @@ -928,6 +952,9 @@ public: LLOctreeNode<LLVolumeTriangle>* mOctree; + //whether or not face has been cache optimized + BOOL mOptimized; + private: BOOL createUnCutCubeCap(LLVolume* volume, BOOL partial_build = FALSE); BOOL createCap(LLVolume* volume, BOOL partial_build = FALSE); @@ -942,11 +969,7 @@ protected: ~LLVolume(); // use unref public: - struct Point - { - LLVector3 mPos; - }; - + struct FaceParams { LLFaceID mFaceID; @@ -969,13 +992,13 @@ public: const LLProfile& getProfile() const { return *mProfilep; } LLPath& getPath() const { return *mPathp; } void resizePath(S32 length); - const std::vector<Point>& getMesh() const { return mMesh; } - const LLVector3& getMeshPt(const U32 i) const { return mMesh[i].mPos; } + const LLAlignedArray<LLVector4a,64>& getMesh() const { return mMesh; } + const LLVector4a& getMeshPt(const U32 i) const { return mMesh[i]; } void setDirty() { mPathp->setDirty(); mProfilep->setDirty(); } void regen(); - void genBinormals(S32 face); + void genTangents(S32 face); BOOL isConvex() const; BOOL isCap(S32 face); @@ -985,10 +1008,7 @@ public: S32 getSculptLevel() const { return mSculptLevel; } void setSculptLevel(S32 level) { mSculptLevel = level; } - S32 *getTriangleIndices(U32 &num_indices) const; - - // returns number of triangle indeces required for path/profile mesh - S32 getNumTriangleIndices() const; + static void getLoDTriangleCounts(const LLVolumeParams& params, S32* counts); S32 getNumTriangles(S32* vcount = NULL) const; @@ -1003,32 +1023,14 @@ public: //get the face index of the face that intersects with the given line segment at the point //closest to start. Moves end to the point of intersection. Returns -1 if no intersection. //Line segment must be in volume space. - S32 lineSegmentIntersect(const LLVector3& start, const LLVector3& end, + S32 lineSegmentIntersect(const LLVector4a& start, const LLVector4a& end, S32 face = -1, // which face to check, -1 = ALL_SIDES - LLVector3* intersection = NULL, // return the intersection point + LLVector4a* intersection = NULL, // return the intersection point LLVector2* tex_coord = NULL, // return the texture coordinates of the intersection point - LLVector3* normal = NULL, // return the surface normal at the intersection point - LLVector3* bi_normal = NULL // return the surface bi-normal at the intersection point + LLVector4a* normal = NULL, // return the surface normal at the intersection point + LLVector4a* tangent = NULL // return the surface tangent at the intersection point ); - S32 lineSegmentIntersect(const LLVector4a& start, const LLVector4a& end, - S32 face = 1, - LLVector3* intersection = NULL, - LLVector2* tex_coord = NULL, - LLVector3* normal = NULL, - LLVector3* bi_normal = NULL); - - // The following cleans up vertices and triangles, - // getting rid of degenerate triangles and duplicate vertices, - // and allocates new arrays with the clean data. - static BOOL cleanupTriangleData( const S32 num_input_vertices, - const std::vector<Point> &input_vertices, - const S32 num_input_triangles, - S32 *input_triangles, - S32 &num_output_vertices, - LLVector3 **output_vertices, - S32 &num_output_triangles, - S32 **output_triangles); LLFaceID generateFaceMask(); BOOL isFaceMaskValid(LLFaceID face_mask); @@ -1072,7 +1074,8 @@ public: LLVolumeParams mParams; LLPath *mPathp; LLProfile *mProfilep; - std::vector<Point> mMesh; + LLAlignedArray<LLVector4a,64> mMesh; + BOOL mGenerateSingleFace; typedef std::vector<LLVolumeFace> face_list_t; @@ -1087,21 +1090,12 @@ public: std::ostream& operator<<(std::ostream &s, const LLVolumeParams &volume_params); -void calc_binormal_from_triangle( - LLVector4a& binormal, - const LLVector4a& pos0, - const LLVector2& tex0, - const LLVector4a& pos1, - const LLVector2& tex1, - const LLVector4a& pos2, - const LLVector2& tex2); - BOOL LLLineSegmentBoxIntersect(const F32* start, const F32* end, const F32* center, const F32* size); BOOL LLLineSegmentBoxIntersect(const LLVector3& start, const LLVector3& end, const LLVector3& center, const LLVector3& size); BOOL LLLineSegmentBoxIntersect(const LLVector4a& start, const LLVector4a& end, const LLVector4a& center, const LLVector4a& size); -BOOL LLTriangleRayIntersect(const LLVector3& vert0, const LLVector3& vert1, const LLVector3& vert2, const LLVector3& orig, const LLVector3& dir, - F32& intersection_a, F32& intersection_b, F32& intersection_t, BOOL two_sided); +//BOOL LLTriangleRayIntersect(const LLVector3& vert0, const LLVector3& vert1, const LLVector3& vert2, const LLVector3& orig, const LLVector3& dir, +// F32& intersection_a, F32& intersection_b, F32& intersection_t, BOOL two_sided); BOOL LLTriangleRayIntersect(const LLVector4a& vert0, const LLVector4a& vert1, const LLVector4a& vert2, const LLVector4a& orig, const LLVector4a& dir, F32& intersection_a, F32& intersection_b, F32& intersection_t); diff --git a/indra/llmath/llvolumemgr.cpp b/indra/llmath/llvolumemgr.cpp index c60b750088..9083273ee5 100644..100755 --- a/indra/llmath/llvolumemgr.cpp +++ b/indra/llmath/llvolumemgr.cpp @@ -26,7 +26,6 @@ #include "linden_common.h" #include "llvolumemgr.h" -#include "llmemtype.h" #include "llvolume.h" @@ -182,7 +181,6 @@ void LLVolumeMgr::insertGroup(LLVolumeLODGroup* volgroup) // protected LLVolumeLODGroup* LLVolumeMgr::createNewGroup(const LLVolumeParams& volume_params) { - LLMemType m1(LLMemType::MTYPE_VOLUME); LLVolumeLODGroup* volgroup = new LLVolumeLODGroup(volume_params); insertGroup(volgroup); return volgroup; @@ -297,7 +295,6 @@ LLVolume* LLVolumeLODGroup::refLOD(const S32 detail) mRefs++; if (mVolumeLODs[detail].isNull()) { - LLMemType m1(LLMemType::MTYPE_VOLUME); mVolumeLODs[detail] = new LLVolume(mVolumeParams, mDetailScales[detail]); } mLODRefs[detail]++; diff --git a/indra/llmath/llvolumemgr.h b/indra/llmath/llvolumemgr.h index c75906f675..c75906f675 100644..100755 --- a/indra/llmath/llvolumemgr.h +++ b/indra/llmath/llvolumemgr.h diff --git a/indra/llmath/llvolumeoctree.cpp b/indra/llmath/llvolumeoctree.cpp index b5a935c2b5..0728b49c1f 100644..100755 --- a/indra/llmath/llvolumeoctree.cpp +++ b/indra/llmath/llvolumeoctree.cpp @@ -94,14 +94,14 @@ void LLVolumeOctreeListener::handleChildAddition(const LLOctreeNode<LLVolumeTria LLOctreeTriangleRayIntersect::LLOctreeTriangleRayIntersect(const LLVector4a& start, const LLVector4a& dir, const LLVolumeFace* face, F32* closest_t, - LLVector3* intersection,LLVector2* tex_coord, LLVector3* normal, LLVector3* bi_normal) + LLVector4a* intersection,LLVector2* tex_coord, LLVector4a* normal, LLVector4a* tangent) : mFace(face), mStart(start), mDir(dir), mIntersection(intersection), mTexCoord(tex_coord), mNormal(normal), - mBinormal(bi_normal), + mTangent(tangent), mClosestT(closest_t), mHitFace(false) { @@ -112,13 +112,7 @@ void LLOctreeTriangleRayIntersect::traverse(const LLOctreeNode<LLVolumeTriangle> { LLVolumeOctreeListener* vl = (LLVolumeOctreeListener*) node->getListener(0); - /*const F32* start = mStart.getF32(); - const F32* end = mEnd.getF32(); - const F32* center = vl->mBounds[0].getF32(); - const F32* size = vl->mBounds[1].getF32();*/ - - //if (LLLineSegmentBoxIntersect(mStart, mEnd, vl->mBounds[0], vl->mBounds[1])) - if (LLLineSegmentBoxIntersect(mStart.getF32ptr(), mEnd.getF32ptr(), vl->mBounds[0].getF32ptr(), vl->mBounds[1].getF32ptr())) + if (LLLineSegmentBoxIntersect(mStart, mEnd, vl->mBounds[0], vl->mBounds[1])) { node->accept(this); for (S32 i = 0; i < node->getChildCount(); ++i) @@ -131,7 +125,7 @@ void LLOctreeTriangleRayIntersect::traverse(const LLOctreeNode<LLVolumeTriangle> void LLOctreeTriangleRayIntersect::visit(const LLOctreeNode<LLVolumeTriangle>* node) { for (LLOctreeNode<LLVolumeTriangle>::const_element_iter iter = - node->getData().begin(); iter != node->getData().end(); ++iter) + node->getDataBegin(); iter != node->getDataEnd(); ++iter) { const LLVolumeTriangle* tri = *iter; @@ -152,34 +146,60 @@ void LLOctreeTriangleRayIntersect::visit(const LLOctreeNode<LLVolumeTriangle>* n LLVector4a intersect = mDir; intersect.mul(*mClosestT); intersect.add(mStart); - mIntersection->set(intersect.getF32ptr()); + *mIntersection = intersect; } + U32 idx0 = tri->mIndex[0]; + U32 idx1 = tri->mIndex[1]; + U32 idx2 = tri->mIndex[2]; if (mTexCoord != NULL) { LLVector2* tc = (LLVector2*) mFace->mTexCoords; - *mTexCoord = ((1.f - a - b) * tc[tri->mIndex[0]] + - a * tc[tri->mIndex[1]] + - b * tc[tri->mIndex[2]]); + *mTexCoord = ((1.f - a - b) * tc[idx0] + + a * tc[idx1] + + b * tc[idx2]); } if (mNormal != NULL) { - LLVector4* norm = (LLVector4*) mFace->mNormals; - - *mNormal = ((1.f - a - b) * LLVector3(norm[tri->mIndex[0]]) + - a * LLVector3(norm[tri->mIndex[1]]) + - b * LLVector3(norm[tri->mIndex[2]])); + LLVector4a* norm = mFace->mNormals; + + LLVector4a n1,n2,n3; + n1 = norm[idx0]; + n1.mul(1.f-a-b); + + n2 = norm[idx1]; + n2.mul(a); + + n3 = norm[idx2]; + n3.mul(b); + + n1.add(n2); + n1.add(n3); + + *mNormal = n1; } - if (mBinormal != NULL) + if (mTangent != NULL) { - LLVector4* binormal = (LLVector4*) mFace->mBinormals; - *mBinormal = ((1.f - a - b) * LLVector3(binormal[tri->mIndex[0]]) + - a * LLVector3(binormal[tri->mIndex[1]]) + - b * LLVector3(binormal[tri->mIndex[2]])); + LLVector4a* tangents = mFace->mTangents; + + LLVector4a t1,t2,t3; + t1 = tangents[idx0]; + t1.mul(1.f-a-b); + + t2 = tangents[idx1]; + t2.mul(a); + + t3 = tangents[idx2]; + t3.mul(b); + + t1.add(t2); + t1.add(t3); + + *mTangent = t1; } } } @@ -236,8 +256,8 @@ void LLVolumeOctreeValidate::visit(const LLOctreeNode<LLVolumeTriangle>* branch) } //children fit, check data - for (LLOctreeNode<LLVolumeTriangle>::const_element_iter iter = branch->getData().begin(); - iter != branch->getData().end(); ++iter) + for (LLOctreeNode<LLVolumeTriangle>::const_element_iter iter = branch->getDataBegin(); + iter != branch->getDataEnd(); ++iter) { const LLVolumeTriangle* tri = *iter; diff --git a/indra/llmath/llvolumeoctree.h b/indra/llmath/llvolumeoctree.h index 688d91dc40..80d6ced36d 100644..100755 --- a/indra/llmath/llvolumeoctree.h +++ b/indra/llmath/llvolumeoctree.h @@ -37,9 +37,19 @@ class LLVolumeTriangle : public LLRefCount { public: + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + LLVolumeTriangle() { - + mBinIndex = -1; } LLVolumeTriangle(const LLVolumeTriangle& rhs) @@ -58,21 +68,38 @@ public: } - LLVector4a mPositionGroup; + LL_ALIGN_16(LLVector4a mPositionGroup); const LLVector4a* mV[3]; U16 mIndex[3]; F32 mRadius; + mutable S32 mBinIndex; + virtual const LLVector4a& getPositionGroup() const; virtual const F32& getBinRadius() const; + + S32 getBinIndex() const { return mBinIndex; } + void setBinIndex(S32 idx) const { mBinIndex = idx; } + + }; class LLVolumeOctreeListener : public LLOctreeListener<LLVolumeTriangle> { public: + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + } + LLVolumeOctreeListener(LLOctreeNode<LLVolumeTriangle>* node); ~LLVolumeOctreeListener(); @@ -99,8 +126,8 @@ public: public: - LLVector4a mBounds[2]; // bounding box (center, size) of this node and all its children (tight fit to objects) - LLVector4a mExtents[2]; // extents (min, max) of this node and all its children + LL_ALIGN_16(LLVector4a mBounds[2]); // bounding box (center, size) of this node and all its children (tight fit to objects) + LL_ALIGN_16(LLVector4a mExtents[2]); // extents (min, max) of this node and all its children }; class LLOctreeTriangleRayIntersect : public LLOctreeTraveler<LLVolumeTriangle> @@ -110,16 +137,16 @@ public: LLVector4a mStart; LLVector4a mDir; LLVector4a mEnd; - LLVector3* mIntersection; + LLVector4a* mIntersection; LLVector2* mTexCoord; - LLVector3* mNormal; - LLVector3* mBinormal; + LLVector4a* mNormal; + LLVector4a* mTangent; F32* mClosestT; bool mHitFace; LLOctreeTriangleRayIntersect(const LLVector4a& start, const LLVector4a& dir, const LLVolumeFace* face, F32* closest_t, - LLVector3* intersection,LLVector2* tex_coord, LLVector3* normal, LLVector3* bi_normal); + LLVector4a* intersection,LLVector2* tex_coord, LLVector4a* normal, LLVector4a* tangent); void traverse(const LLOctreeNode<LLVolumeTriangle>* node); diff --git a/indra/llmath/m3math.cpp b/indra/llmath/m3math.cpp index 802ddb9e57..802ddb9e57 100644..100755 --- a/indra/llmath/m3math.cpp +++ b/indra/llmath/m3math.cpp diff --git a/indra/llmath/m3math.h b/indra/llmath/m3math.h index 2be5452f8d..2be5452f8d 100644..100755 --- a/indra/llmath/m3math.h +++ b/indra/llmath/m3math.h diff --git a/indra/llmath/m4math.cpp b/indra/llmath/m4math.cpp index 6a1b4143cf..6a1b4143cf 100644..100755 --- a/indra/llmath/m4math.cpp +++ b/indra/llmath/m4math.cpp diff --git a/indra/llmath/m4math.h b/indra/llmath/m4math.h index a7dce10397..a7dce10397 100644..100755 --- a/indra/llmath/m4math.h +++ b/indra/llmath/m4math.h diff --git a/indra/llmath/raytrace.cpp b/indra/llmath/raytrace.cpp index f38fe49bcb..f38fe49bcb 100644..100755 --- a/indra/llmath/raytrace.cpp +++ b/indra/llmath/raytrace.cpp diff --git a/indra/llmath/raytrace.h b/indra/llmath/raytrace.h index 2d32af0c86..2d32af0c86 100644..100755 --- a/indra/llmath/raytrace.h +++ b/indra/llmath/raytrace.h diff --git a/indra/llmath/tests/alignment_test.cpp b/indra/llmath/tests/alignment_test.cpp new file mode 100755 index 0000000000..5ee3c45502 --- /dev/null +++ b/indra/llmath/tests/alignment_test.cpp @@ -0,0 +1,141 @@ +/** + * @file v3dmath_test.cpp + * @author Vir + * @date 2011-12 + * @brief v3dmath test cases. + * + * $LicenseInfo:firstyear=2011&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2011, Linden Research, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA + * $/LicenseInfo$ + */ + +// Tests related to allocating objects with alignment constraints, particularly for SSE support. + +#include "linden_common.h" +#include "../test/lltut.h" +#include "../llmath.h" +#include "../llsimdmath.h" +#include "../llvector4a.h" + +namespace tut +{ + +#define is_aligned(ptr,alignment) ((reinterpret_cast<uintptr_t>(ptr))%(alignment)==0) +#define is_aligned_relative(ptr,base_ptr,alignment) ((reinterpret_cast<uintptr_t>(ptr)-reinterpret_cast<uintptr_t>(base_ptr))%(alignment)==0) + +struct alignment_test {}; + +typedef test_group<alignment_test> alignment_test_t; +typedef alignment_test_t::object alignment_test_object_t; +tut::alignment_test_t tut_alignment_test("LLAlignment"); + +LL_ALIGN_PREFIX(16) +class MyVector4a +{ +public: + void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void *p) + { + ll_aligned_free_16(p); + } + + void* operator new[](size_t count) + { // try to allocate count bytes for an array + return ll_aligned_malloc_16(count); + } + + void operator delete[](void *p) + { + ll_aligned_free_16(p); + } + + LLQuad mQ; +} LL_ALIGN_POSTFIX(16); + + +// Verify that aligned allocators perform as advertised. +template<> template<> +void alignment_test_object_t::test<1>() +{ +# ifdef LL_DEBUG +// skip("This test fails on Windows when compiled in debug mode."); +# endif + + const int num_tests = 7; + void *align_ptr; + for (int i=0; i<num_tests; i++) + { + align_ptr = ll_aligned_malloc_16(sizeof(MyVector4a)); + ensure("ll_aligned_malloc_16 failed", is_aligned(align_ptr,16)); + + align_ptr = ll_aligned_realloc_16(align_ptr,2*sizeof(MyVector4a), sizeof(MyVector4a)); + ensure("ll_aligned_realloc_16 failed", is_aligned(align_ptr,16)); + + ll_aligned_free_16(align_ptr); + + align_ptr = ll_aligned_malloc_32(sizeof(MyVector4a)); + ensure("ll_aligned_malloc_32 failed", is_aligned(align_ptr,32)); + ll_aligned_free_32(align_ptr); + } +} + +// In-place allocation of objects and arrays. +template<> template<> +void alignment_test_object_t::test<2>() +{ + MyVector4a vec1; + ensure("LLAlignment vec1 unaligned", is_aligned(&vec1,16)); + + MyVector4a veca[12]; + ensure("LLAlignment veca unaligned", is_aligned(veca,16)); +} + +// Heap allocation of objects and arrays. +template<> template<> +void alignment_test_object_t::test<3>() +{ +# ifdef LL_DEBUG +// skip("This test fails on Windows when compiled in debug mode."); +# endif + + const int ARR_SIZE = 7; + for(int i=0; i<ARR_SIZE; i++) + { + MyVector4a *vecp = new MyVector4a; + ensure("LLAlignment vecp unaligned", is_aligned(vecp,16)); + delete vecp; + } + + MyVector4a *veca = new MyVector4a[ARR_SIZE]; + //std::cout << "veca base is " << (S32) veca << std::endl; + ensure("LLAligment veca base", is_aligned(veca,16)); + for(int i=0; i<ARR_SIZE; i++) + { + std::cout << "veca[" << i << "]" << std::endl; + ensure("LLAlignment veca member unaligned", is_aligned(&veca[i],16)); + } + delete [] veca; +} + +} diff --git a/indra/llmath/tests/llbbox_test.cpp b/indra/llmath/tests/llbbox_test.cpp index fd0dbb58fc..fd0dbb58fc 100644..100755 --- a/indra/llmath/tests/llbbox_test.cpp +++ b/indra/llmath/tests/llbbox_test.cpp diff --git a/indra/llmath/tests/llbboxlocal_test.cpp b/indra/llmath/tests/llbboxlocal_test.cpp index f31e4126c4..f31e4126c4 100644..100755 --- a/indra/llmath/tests/llbboxlocal_test.cpp +++ b/indra/llmath/tests/llbboxlocal_test.cpp diff --git a/indra/llmath/tests/llmodularmath_test.cpp b/indra/llmath/tests/llmodularmath_test.cpp index 063d3ef79f..063d3ef79f 100644..100755 --- a/indra/llmath/tests/llmodularmath_test.cpp +++ b/indra/llmath/tests/llmodularmath_test.cpp diff --git a/indra/llmath/tests/llquaternion_test.cpp b/indra/llmath/tests/llquaternion_test.cpp index e69010b2d6..e69010b2d6 100644..100755 --- a/indra/llmath/tests/llquaternion_test.cpp +++ b/indra/llmath/tests/llquaternion_test.cpp diff --git a/indra/llmath/tests/llrect_test.cpp b/indra/llmath/tests/llrect_test.cpp index d740173e69..d740173e69 100644..100755 --- a/indra/llmath/tests/llrect_test.cpp +++ b/indra/llmath/tests/llrect_test.cpp diff --git a/indra/llmath/tests/m3math_test.cpp b/indra/llmath/tests/m3math_test.cpp index 1ca2b005d9..1ca2b005d9 100644..100755 --- a/indra/llmath/tests/m3math_test.cpp +++ b/indra/llmath/tests/m3math_test.cpp diff --git a/indra/llmath/tests/mathmisc_test.cpp b/indra/llmath/tests/mathmisc_test.cpp index 91a2e6c009..91a2e6c009 100644..100755 --- a/indra/llmath/tests/mathmisc_test.cpp +++ b/indra/llmath/tests/mathmisc_test.cpp diff --git a/indra/llmath/tests/v2math_test.cpp b/indra/llmath/tests/v2math_test.cpp index 4d6a2eca93..4d6a2eca93 100644..100755 --- a/indra/llmath/tests/v2math_test.cpp +++ b/indra/llmath/tests/v2math_test.cpp diff --git a/indra/llmath/tests/v3color_test.cpp b/indra/llmath/tests/v3color_test.cpp index 29d1c483ab..29d1c483ab 100644..100755 --- a/indra/llmath/tests/v3color_test.cpp +++ b/indra/llmath/tests/v3color_test.cpp diff --git a/indra/llmath/tests/v3dmath_test.cpp b/indra/llmath/tests/v3dmath_test.cpp index 20b26faa12..20b26faa12 100644..100755 --- a/indra/llmath/tests/v3dmath_test.cpp +++ b/indra/llmath/tests/v3dmath_test.cpp diff --git a/indra/llmath/tests/v3math_test.cpp b/indra/llmath/tests/v3math_test.cpp index e4ae1c10ef..e4ae1c10ef 100644..100755 --- a/indra/llmath/tests/v3math_test.cpp +++ b/indra/llmath/tests/v3math_test.cpp diff --git a/indra/llmath/tests/v4color_test.cpp b/indra/llmath/tests/v4color_test.cpp index d7eec3c87f..d7eec3c87f 100644..100755 --- a/indra/llmath/tests/v4color_test.cpp +++ b/indra/llmath/tests/v4color_test.cpp diff --git a/indra/llmath/tests/v4coloru_test.cpp b/indra/llmath/tests/v4coloru_test.cpp index 128f6f3564..128f6f3564 100644..100755 --- a/indra/llmath/tests/v4coloru_test.cpp +++ b/indra/llmath/tests/v4coloru_test.cpp diff --git a/indra/llmath/tests/v4math_test.cpp b/indra/llmath/tests/v4math_test.cpp index 191ac864df..191ac864df 100644..100755 --- a/indra/llmath/tests/v4math_test.cpp +++ b/indra/llmath/tests/v4math_test.cpp diff --git a/indra/llmath/tests/xform_test.cpp b/indra/llmath/tests/xform_test.cpp index 49870eef3c..49870eef3c 100644..100755 --- a/indra/llmath/tests/xform_test.cpp +++ b/indra/llmath/tests/xform_test.cpp diff --git a/indra/llmath/v2math.cpp b/indra/llmath/v2math.cpp index a0cd642853..a0cd642853 100644..100755 --- a/indra/llmath/v2math.cpp +++ b/indra/llmath/v2math.cpp diff --git a/indra/llmath/v2math.h b/indra/llmath/v2math.h index 8d5db96f5e..8d5db96f5e 100644..100755 --- a/indra/llmath/v2math.h +++ b/indra/llmath/v2math.h diff --git a/indra/llmath/v3color.cpp b/indra/llmath/v3color.cpp index d38f48b11e..d38f48b11e 100644..100755 --- a/indra/llmath/v3color.cpp +++ b/indra/llmath/v3color.cpp diff --git a/indra/llmath/v3color.h b/indra/llmath/v3color.h index 56cb2ae73e..daf3a6857b 100644..100755 --- a/indra/llmath/v3color.h +++ b/indra/llmath/v3color.h @@ -33,6 +33,7 @@ class LLVector4; #include "llerror.h" #include "llmath.h" #include "llsd.h" +#include <string.h> // LLColor3 = |r g b| diff --git a/indra/llmath/v3dmath.cpp b/indra/llmath/v3dmath.cpp index a50cb3c6ca..a50cb3c6ca 100644..100755 --- a/indra/llmath/v3dmath.cpp +++ b/indra/llmath/v3dmath.cpp diff --git a/indra/llmath/v3dmath.h b/indra/llmath/v3dmath.h index 578dcdc8ea..578dcdc8ea 100644..100755 --- a/indra/llmath/v3dmath.h +++ b/indra/llmath/v3dmath.h diff --git a/indra/llmath/v3math.cpp b/indra/llmath/v3math.cpp index e7107dee16..e7107dee16 100644..100755 --- a/indra/llmath/v3math.cpp +++ b/indra/llmath/v3math.cpp diff --git a/indra/llmath/v3math.h b/indra/llmath/v3math.h index 0432aeba4c..0432aeba4c 100644..100755 --- a/indra/llmath/v3math.h +++ b/indra/llmath/v3math.h diff --git a/indra/llmath/v4color.cpp b/indra/llmath/v4color.cpp index 81ac62be56..81ac62be56 100644..100755 --- a/indra/llmath/v4color.cpp +++ b/indra/llmath/v4color.cpp diff --git a/indra/llmath/v4color.h b/indra/llmath/v4color.h index b047f86e6e..8c8c315808 100644..100755 --- a/indra/llmath/v4color.h +++ b/indra/llmath/v4color.h @@ -50,7 +50,7 @@ class LLColor4 LLColor4(F32 r, F32 g, F32 b); // Initializes LLColor4 to (r, g, b, 1) LLColor4(F32 r, F32 g, F32 b, F32 a); // Initializes LLColor4 to (r. g, b, a) LLColor4(U32 clr); // Initializes LLColor4 to (r=clr>>24, etc)) - LLColor4(const F32 *vec); // Initializes LLColor4 to (vec[0]. vec[1], vec[2], 1) + LLColor4(const F32 *vec); // Initializes LLColor4 to (vec[0]. vec[1], vec[2], vec[3]) LLColor4(const LLColor3 &vec, F32 a = 1.f); // Initializes LLColor4 to (vec, a) explicit LLColor4(const LLSD& sd); explicit LLColor4(const LLColor4U& color4u); // "explicit" to avoid automatic conversion diff --git a/indra/llmath/v4coloru.cpp b/indra/llmath/v4coloru.cpp index f1a2518cf3..f1a2518cf3 100644..100755 --- a/indra/llmath/v4coloru.cpp +++ b/indra/llmath/v4coloru.cpp diff --git a/indra/llmath/v4coloru.h b/indra/llmath/v4coloru.h index 12da7e2dd7..12da7e2dd7 100644..100755 --- a/indra/llmath/v4coloru.h +++ b/indra/llmath/v4coloru.h diff --git a/indra/llmath/v4math.cpp b/indra/llmath/v4math.cpp index 2782cf2966..2782cf2966 100644..100755 --- a/indra/llmath/v4math.cpp +++ b/indra/llmath/v4math.cpp diff --git a/indra/llmath/v4math.h b/indra/llmath/v4math.h index 623c8b2003..623c8b2003 100644..100755 --- a/indra/llmath/v4math.h +++ b/indra/llmath/v4math.h diff --git a/indra/llmath/xform.cpp b/indra/llmath/xform.cpp index b75aec6a27..b75aec6a27 100644..100755 --- a/indra/llmath/xform.cpp +++ b/indra/llmath/xform.cpp diff --git a/indra/llmath/xform.h b/indra/llmath/xform.h index 1b50749b3e..1b50749b3e 100644..100755 --- a/indra/llmath/xform.h +++ b/indra/llmath/xform.h |