diff options
Diffstat (limited to 'indra/llmath')
-rw-r--r-- | indra/llmath/CMakeLists.txt | 7 | ||||
-rw-r--r-- | indra/llmath/llmath.h | 9 | ||||
-rw-r--r-- | indra/llmath/llmatrix4a.h | 39 | ||||
-rw-r--r-- | indra/llmath/lloctree.cpp | 29 | ||||
-rw-r--r-- | indra/llmath/llsimdmath.h | 2 | ||||
-rw-r--r-- | indra/llmath/llvolume.cpp | 42 | ||||
-rw-r--r-- | indra/llmath/llvolume.h | 4 | ||||
-rw-r--r-- | indra/llmath/tests/v4math_test.cpp | 3 | ||||
-rw-r--r-- | indra/llmath/v4coloru.h | 32 |
9 files changed, 138 insertions, 29 deletions
diff --git a/indra/llmath/CMakeLists.txt b/indra/llmath/CMakeLists.txt index 0614fd92ef..4c8bcdac91 100644 --- a/indra/llmath/CMakeLists.txt +++ b/indra/llmath/CMakeLists.txt @@ -4,6 +4,7 @@ project(llmath) include(00-Common) include(LLCommon) +include(Boost) include_directories( ${LLCOMMON_INCLUDE_DIRS} @@ -20,6 +21,7 @@ set(llmath_SOURCE_FILES llline.cpp llmatrix3a.cpp llmodularmath.cpp + lloctree.cpp llperlin.cpp llquaternion.cpp llrect.cpp @@ -117,6 +119,11 @@ if (LL_TESTS) v4color.cpp v4coloru.cpp ) + set_source_files_properties( + ${llmath_TEST_SOURCE_FILES} + PROPERTIES + LL_TEST_ADDITIONAL_LIBRARIES "${BOOST_THREAD_LIBRARY}" + ) LL_ADD_PROJECT_UNIT_TESTS(llmath "${llmath_TEST_SOURCE_FILES}") # INTEGRATION TESTS diff --git a/indra/llmath/llmath.h b/indra/llmath/llmath.h index 93b9f22b25..e508c9a199 100644 --- a/indra/llmath/llmath.h +++ b/indra/llmath/llmath.h @@ -153,7 +153,7 @@ inline F64 llabs(const F64 a) inline S32 lltrunc( F32 f ) { -#if LL_WINDOWS && !defined( __INTEL_COMPILER ) +#if LL_WINDOWS && !defined( __INTEL_COMPILER ) && (ADDRESS_SIZE == 32) // Avoids changing the floating point control word. // Add or subtract 0.5 - epsilon and then round const static U32 zpfp[] = { 0xBEFFFFFF, 0x3EFFFFFF }; @@ -179,7 +179,7 @@ inline S32 lltrunc( F64 f ) inline S32 llfloor( F32 f ) { -#if LL_WINDOWS && !defined( __INTEL_COMPILER ) +#if LL_WINDOWS && !defined( __INTEL_COMPILER ) && (ADDRESS_SIZE == 32) // Avoids changing the floating point control word. // Accurate (unlike Stereopsis version) for all values between S32_MIN and S32_MAX and slightly faster than Stereopsis version. // Add -(0.5 - epsilon) and then round @@ -254,6 +254,11 @@ inline int round_int(double x) } #endif // BOGUS_ROUND +inline F64 ll_round(const F64 val) +{ + return F64(floor(val + 0.5f)); +} + inline F32 ll_round( F32 val, F32 nearest ) { return F32(floor(val * (1.0f / nearest) + 0.5f)) * nearest; diff --git a/indra/llmath/llmatrix4a.h b/indra/llmath/llmatrix4a.h index d141298f69..216334752a 100644 --- a/indra/llmath/llmatrix4a.h +++ b/indra/llmath/llmatrix4a.h @@ -121,7 +121,7 @@ public: res.add(z); } - inline void affineTransform(const LLVector4a& v, LLVector4a& res) + inline void affineTransformSSE(const LLVector4a& v, LLVector4a& res) { LLVector4a x,y,z; @@ -137,6 +137,43 @@ public: z.add(mMatrix[3]); res.setAdd(x,z); } + + inline void affineTransformNonSSE(const LLVector4a& v, LLVector4a& res) + { + F32 x = v[0] * mMatrix[0][0] + v[1] * mMatrix[1][0] + v[2] * mMatrix[2][0] + mMatrix[3][0]; + F32 y = v[0] * mMatrix[0][1] + v[1] * mMatrix[1][1] + v[2] * mMatrix[2][1] + mMatrix[3][1]; + F32 z = v[0] * mMatrix[0][2] + v[1] * mMatrix[1][2] + v[2] * mMatrix[2][2] + mMatrix[3][2]; + F32 w = 1.0f; + res.set(x,y,z,w); + } + + inline void affineTransform(const LLVector4a& v, LLVector4a& res) + { + affineTransformSSE(v,res); + } }; +inline LLVector4a rowMul(const LLVector4a &row, const LLMatrix4a &mat) +{ + LLVector4a result; + result = _mm_mul_ps(_mm_shuffle_ps(row, row, _MM_SHUFFLE(0, 0, 0, 0)), mat.mMatrix[0]); + result = _mm_add_ps(result, _mm_mul_ps(_mm_shuffle_ps(row, row, _MM_SHUFFLE(1, 1, 1, 1)), mat.mMatrix[1])); + result = _mm_add_ps(result, _mm_mul_ps(_mm_shuffle_ps(row, row, _MM_SHUFFLE(2, 2, 2, 2)), mat.mMatrix[2])); + result = _mm_add_ps(result, _mm_mul_ps(_mm_shuffle_ps(row, row, _MM_SHUFFLE(3, 3, 3, 3)), mat.mMatrix[3])); + return result; +} + +inline void matMul(const LLMatrix4a &a, const LLMatrix4a &b, LLMatrix4a &res) +{ + LLVector4a row0 = rowMul(a.mMatrix[0], b); + LLVector4a row1 = rowMul(a.mMatrix[1], b); + LLVector4a row2 = rowMul(a.mMatrix[2], b); + LLVector4a row3 = rowMul(a.mMatrix[3], b); + + res.mMatrix[0] = row0; + res.mMatrix[1] = row1; + res.mMatrix[2] = row2; + res.mMatrix[3] = row3; +} + #endif diff --git a/indra/llmath/lloctree.cpp b/indra/llmath/lloctree.cpp new file mode 100644 index 0000000000..3fcb3a27d7 --- /dev/null +++ b/indra/llmath/lloctree.cpp @@ -0,0 +1,29 @@ +/** + * @file lloctree.cpp + * + * $LicenseInfo:firstyear=2005&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2010, Linden Research, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA + * $/LicenseInfo$ + */ +#include "stdtypes.h" + +U32 gOctreeMaxCapacity; +F32 gOctreeMinSize; + diff --git a/indra/llmath/llsimdmath.h b/indra/llmath/llsimdmath.h index cebd2ace7d..54a275633f 100644 --- a/indra/llmath/llsimdmath.h +++ b/indra/llmath/llsimdmath.h @@ -31,7 +31,7 @@ #error "Please include llmath.h before this file." #endif -#if ( ( LL_DARWIN || LL_LINUX ) && !(__SSE2__) ) || ( LL_WINDOWS && ( _M_IX86_FP < 2 ) ) +#if ( ( LL_DARWIN || LL_LINUX ) && !(__SSE2__) ) || ( LL_WINDOWS && ( _M_IX86_FP < 2 && ADDRESS_SIZE == 32 ) ) #error SSE2 not enabled. LLVector4a and related class will not compile. #endif diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index d932eb53a0..f63a721c35 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -89,7 +89,7 @@ const F32 SKEW_MAX = 0.95f; const F32 SCULPT_MIN_AREA = 0.002f; const S32 SCULPT_MIN_AREA_DETAIL = 1; -extern BOOL gDebugGL; +BOOL gDebugGL = FALSE; BOOL check_same_clock_dir( const LLVector3& pt1, const LLVector3& pt2, const LLVector3& pt3, const LLVector3& norm) { @@ -2143,19 +2143,22 @@ BOOL LLVolume::generate() F32 profile_detail = mDetail; F32 path_detail = mDetail; - - U8 path_type = mParams.getPathParams().getCurveType(); - U8 profile_type = mParams.getProfileParams().getCurveType(); - - if (path_type == LL_PCODE_PATH_LINE && profile_type == LL_PCODE_PROFILE_CIRCLE) - { //cylinders don't care about Z-Axis - mLODScaleBias.setVec(0.6f, 0.6f, 0.0f); - } - else if (path_type == LL_PCODE_PATH_CIRCLE) - { - mLODScaleBias.setVec(0.6f, 0.6f, 0.6f); + + if ((mParams.getSculptType() & LL_SCULPT_TYPE_MASK) != LL_SCULPT_TYPE_MESH) + { + U8 path_type = mParams.getPathParams().getCurveType(); + U8 profile_type = mParams.getProfileParams().getCurveType(); + if (path_type == LL_PCODE_PATH_LINE && profile_type == LL_PCODE_PROFILE_CIRCLE) + { + //cylinders don't care about Z-Axis + mLODScaleBias.setVec(0.6f, 0.6f, 0.0f); + } + else if (path_type == LL_PCODE_PATH_CIRCLE) + { + mLODScaleBias.setVec(0.6f, 0.6f, 0.6f); + } } - + BOOL regenPath = mPathp->generate(mParams.getPathParams(), path_detail, split); BOOL regenProf = mProfilep->generate(mParams.getProfileParams(), mPathp->isOpen(),profile_detail, split); @@ -2544,7 +2547,7 @@ bool LLVolume::unpackVolumeFaces(std::istream& is, S32 size) U16 influence = weights[idx++]; influence |= ((U16) weights[idx++] << 8); - F32 w = llclamp((F32) influence / 65535.f, 0.f, 0.99999f); + F32 w = llclamp((F32) influence / 65535.f, 0.001f, 0.999f); wght.mV[cur_influence] = w; joints[cur_influence] = joint; cur_influence++; @@ -2561,11 +2564,15 @@ bool LLVolume::unpackVolumeFaces(std::istream& is, S32 size) F32 wsum = wght.mV[VX] + wght.mV[VY] + wght.mV[VZ] + wght.mV[VW]; if (wsum <= 0.f) { - wght = LLVector4(0.99999f,0.f,0.f,0.f); + wght = LLVector4(0.999f,0.f,0.f,0.f); } for (U32 k=0; k<4; k++) { - joints_with_weights[k] = (F32) joints[k] + wght[k]; + F32 f_combined = (F32) joints[k] + wght[k]; + joints_with_weights[k] = f_combined; + // Any weights we added above should wind up non-zero and applied to a specific bone. + // A failure here would indicate a floating point precision error in the math. + llassert((k >= cur_influence) || (f_combined - S32(f_combined) > 0.0f)); } face.mWeights[cur_vertex].loadua(joints_with_weights.mV); @@ -4568,6 +4575,7 @@ LLVolumeFace::LLVolumeFace() : mTexCoords(NULL), mIndices(NULL), mWeights(NULL), + mWeightsScrubbed(FALSE), mOctree(NULL), mOptimized(FALSE) { @@ -4593,6 +4601,7 @@ LLVolumeFace::LLVolumeFace(const LLVolumeFace& src) mTexCoords(NULL), mIndices(NULL), mWeights(NULL), + mWeightsScrubbed(FALSE), mOctree(NULL) { mExtents = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*3); @@ -4664,6 +4673,7 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src) ll_aligned_free_16(mWeights); mWeights = NULL; } + mWeightsScrubbed = src.mWeightsScrubbed; } if (mNumIndices) diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h index 1da2d0c6b1..bf81c978a0 100644 --- a/indra/llmath/llvolume.h +++ b/indra/llmath/llvolume.h @@ -199,6 +199,8 @@ const U8 LL_SCULPT_FLAG_MASK = LL_SCULPT_FLAG_INVERT | LL_SCULPT_FLAG_MIRROR; const S32 LL_SCULPT_MESH_MAX_FACES = 8; +extern BOOL gDebugGL; + class LLProfileParams { public: @@ -953,6 +955,8 @@ public: // mWeights.size() should be empty or match mVertices.size() LLVector4a* mWeights; + mutable BOOL mWeightsScrubbed; + LLOctreeNode<LLVolumeTriangle>* mOctree; //whether or not face has been cache optimized diff --git a/indra/llmath/tests/v4math_test.cpp b/indra/llmath/tests/v4math_test.cpp index 191ac864df..9779dfded3 100644 --- a/indra/llmath/tests/v4math_test.cpp +++ b/indra/llmath/tests/v4math_test.cpp @@ -355,7 +355,8 @@ namespace tut val3 = z1 + (z2 - z1)* val; val4 = w1 + (w2 - w1)* val; LLVector4 vec4b = lerp(vec4,vec4a,val); - ensure("lerp failed", ((val1 ==vec4b.mV[VX])&& (val2 ==vec4b.mV[VY]) && (val3 ==vec4b.mV[VZ])&& (val4 ==vec4b.mV[VW]))); + LLVector4 check(val1, val2, val3, val4); + ensure_equals("lerp failed", check, vec4b); } template<> template<> diff --git a/indra/llmath/v4coloru.h b/indra/llmath/v4coloru.h index fddad34978..704ce852d9 100644 --- a/indra/llmath/v4coloru.h +++ b/indra/llmath/v4coloru.h @@ -47,14 +47,7 @@ class LLColor4U { public: - union - { - U8 mV[LENGTHOFCOLOR4U]; - U32 mAll; - LLColor4* mSources; - LLColor4U* mSourcesU; - }; - + U8 mV[LENGTHOFCOLOR4U]; LLColor4U(); // Initializes LLColor4U to (0, 0, 0, 1) LLColor4U(U8 r, U8 g, U8 b); // Initializes LLColor4U to (r, g, b, 1) @@ -132,6 +125,9 @@ public: return LLColor4(*this); } + U32 asRGBA() const; + void fromRGBA( U32 aVal ); + static LLColor4U white; static LLColor4U black; static LLColor4U red; @@ -565,6 +561,26 @@ void LLColor4U::setVecScaleClamp(const LLColor3& color) mV[3] = 255; } +inline U32 LLColor4U::asRGBA() const +{ + // Little endian: values are swapped in memory. The original code access the array like a U32, so we need to swap here + + return (mV[3] << 24) | (mV[2] << 16) | (mV[1] << 8) | mV[0]; +} + +inline void LLColor4U::fromRGBA( U32 aVal ) +{ + // Little endian: values are swapped in memory. The original code access the array like a U32, so we need to swap here + + mV[ 0 ] = aVal & 0xFF; + aVal >>= 8; + mV[ 1 ] = aVal & 0xFF; + aVal >>= 8; + mV[ 2 ] = aVal & 0xFF; + aVal >>= 8; + mV[ 3 ] = aVal & 0xFF; +} + #endif |