summaryrefslogtreecommitdiff
path: root/indra/llmath
diff options
context:
space:
mode:
Diffstat (limited to 'indra/llmath')
-rw-r--r--indra/llmath/CMakeLists.txt7
-rw-r--r--indra/llmath/llmath.h9
-rw-r--r--indra/llmath/llmatrix4a.h39
-rw-r--r--indra/llmath/lloctree.cpp29
-rw-r--r--indra/llmath/llsimdmath.h2
-rw-r--r--indra/llmath/llvolume.cpp42
-rw-r--r--indra/llmath/llvolume.h4
-rw-r--r--indra/llmath/tests/v4math_test.cpp3
-rw-r--r--indra/llmath/v4coloru.h32
9 files changed, 138 insertions, 29 deletions
diff --git a/indra/llmath/CMakeLists.txt b/indra/llmath/CMakeLists.txt
index 0614fd92ef..4c8bcdac91 100644
--- a/indra/llmath/CMakeLists.txt
+++ b/indra/llmath/CMakeLists.txt
@@ -4,6 +4,7 @@ project(llmath)
include(00-Common)
include(LLCommon)
+include(Boost)
include_directories(
${LLCOMMON_INCLUDE_DIRS}
@@ -20,6 +21,7 @@ set(llmath_SOURCE_FILES
llline.cpp
llmatrix3a.cpp
llmodularmath.cpp
+ lloctree.cpp
llperlin.cpp
llquaternion.cpp
llrect.cpp
@@ -117,6 +119,11 @@ if (LL_TESTS)
v4color.cpp
v4coloru.cpp
)
+ set_source_files_properties(
+ ${llmath_TEST_SOURCE_FILES}
+ PROPERTIES
+ LL_TEST_ADDITIONAL_LIBRARIES "${BOOST_THREAD_LIBRARY}"
+ )
LL_ADD_PROJECT_UNIT_TESTS(llmath "${llmath_TEST_SOURCE_FILES}")
# INTEGRATION TESTS
diff --git a/indra/llmath/llmath.h b/indra/llmath/llmath.h
index 93b9f22b25..e508c9a199 100644
--- a/indra/llmath/llmath.h
+++ b/indra/llmath/llmath.h
@@ -153,7 +153,7 @@ inline F64 llabs(const F64 a)
inline S32 lltrunc( F32 f )
{
-#if LL_WINDOWS && !defined( __INTEL_COMPILER )
+#if LL_WINDOWS && !defined( __INTEL_COMPILER ) && (ADDRESS_SIZE == 32)
// Avoids changing the floating point control word.
// Add or subtract 0.5 - epsilon and then round
const static U32 zpfp[] = { 0xBEFFFFFF, 0x3EFFFFFF };
@@ -179,7 +179,7 @@ inline S32 lltrunc( F64 f )
inline S32 llfloor( F32 f )
{
-#if LL_WINDOWS && !defined( __INTEL_COMPILER )
+#if LL_WINDOWS && !defined( __INTEL_COMPILER ) && (ADDRESS_SIZE == 32)
// Avoids changing the floating point control word.
// Accurate (unlike Stereopsis version) for all values between S32_MIN and S32_MAX and slightly faster than Stereopsis version.
// Add -(0.5 - epsilon) and then round
@@ -254,6 +254,11 @@ inline int round_int(double x)
}
#endif // BOGUS_ROUND
+inline F64 ll_round(const F64 val)
+{
+ return F64(floor(val + 0.5f));
+}
+
inline F32 ll_round( F32 val, F32 nearest )
{
return F32(floor(val * (1.0f / nearest) + 0.5f)) * nearest;
diff --git a/indra/llmath/llmatrix4a.h b/indra/llmath/llmatrix4a.h
index d141298f69..216334752a 100644
--- a/indra/llmath/llmatrix4a.h
+++ b/indra/llmath/llmatrix4a.h
@@ -121,7 +121,7 @@ public:
res.add(z);
}
- inline void affineTransform(const LLVector4a& v, LLVector4a& res)
+ inline void affineTransformSSE(const LLVector4a& v, LLVector4a& res)
{
LLVector4a x,y,z;
@@ -137,6 +137,43 @@ public:
z.add(mMatrix[3]);
res.setAdd(x,z);
}
+
+ inline void affineTransformNonSSE(const LLVector4a& v, LLVector4a& res)
+ {
+ F32 x = v[0] * mMatrix[0][0] + v[1] * mMatrix[1][0] + v[2] * mMatrix[2][0] + mMatrix[3][0];
+ F32 y = v[0] * mMatrix[0][1] + v[1] * mMatrix[1][1] + v[2] * mMatrix[2][1] + mMatrix[3][1];
+ F32 z = v[0] * mMatrix[0][2] + v[1] * mMatrix[1][2] + v[2] * mMatrix[2][2] + mMatrix[3][2];
+ F32 w = 1.0f;
+ res.set(x,y,z,w);
+ }
+
+ inline void affineTransform(const LLVector4a& v, LLVector4a& res)
+ {
+ affineTransformSSE(v,res);
+ }
};
+inline LLVector4a rowMul(const LLVector4a &row, const LLMatrix4a &mat)
+{
+ LLVector4a result;
+ result = _mm_mul_ps(_mm_shuffle_ps(row, row, _MM_SHUFFLE(0, 0, 0, 0)), mat.mMatrix[0]);
+ result = _mm_add_ps(result, _mm_mul_ps(_mm_shuffle_ps(row, row, _MM_SHUFFLE(1, 1, 1, 1)), mat.mMatrix[1]));
+ result = _mm_add_ps(result, _mm_mul_ps(_mm_shuffle_ps(row, row, _MM_SHUFFLE(2, 2, 2, 2)), mat.mMatrix[2]));
+ result = _mm_add_ps(result, _mm_mul_ps(_mm_shuffle_ps(row, row, _MM_SHUFFLE(3, 3, 3, 3)), mat.mMatrix[3]));
+ return result;
+}
+
+inline void matMul(const LLMatrix4a &a, const LLMatrix4a &b, LLMatrix4a &res)
+{
+ LLVector4a row0 = rowMul(a.mMatrix[0], b);
+ LLVector4a row1 = rowMul(a.mMatrix[1], b);
+ LLVector4a row2 = rowMul(a.mMatrix[2], b);
+ LLVector4a row3 = rowMul(a.mMatrix[3], b);
+
+ res.mMatrix[0] = row0;
+ res.mMatrix[1] = row1;
+ res.mMatrix[2] = row2;
+ res.mMatrix[3] = row3;
+}
+
#endif
diff --git a/indra/llmath/lloctree.cpp b/indra/llmath/lloctree.cpp
new file mode 100644
index 0000000000..3fcb3a27d7
--- /dev/null
+++ b/indra/llmath/lloctree.cpp
@@ -0,0 +1,29 @@
+/**
+ * @file lloctree.cpp
+ *
+ * $LicenseInfo:firstyear=2005&license=viewerlgpl$
+ * Second Life Viewer Source Code
+ * Copyright (C) 2010, Linden Research, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License only.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA
+ * $/LicenseInfo$
+ */
+#include "stdtypes.h"
+
+U32 gOctreeMaxCapacity;
+F32 gOctreeMinSize;
+
diff --git a/indra/llmath/llsimdmath.h b/indra/llmath/llsimdmath.h
index cebd2ace7d..54a275633f 100644
--- a/indra/llmath/llsimdmath.h
+++ b/indra/llmath/llsimdmath.h
@@ -31,7 +31,7 @@
#error "Please include llmath.h before this file."
#endif
-#if ( ( LL_DARWIN || LL_LINUX ) && !(__SSE2__) ) || ( LL_WINDOWS && ( _M_IX86_FP < 2 ) )
+#if ( ( LL_DARWIN || LL_LINUX ) && !(__SSE2__) ) || ( LL_WINDOWS && ( _M_IX86_FP < 2 && ADDRESS_SIZE == 32 ) )
#error SSE2 not enabled. LLVector4a and related class will not compile.
#endif
diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp
index d932eb53a0..f63a721c35 100644
--- a/indra/llmath/llvolume.cpp
+++ b/indra/llmath/llvolume.cpp
@@ -89,7 +89,7 @@ const F32 SKEW_MAX = 0.95f;
const F32 SCULPT_MIN_AREA = 0.002f;
const S32 SCULPT_MIN_AREA_DETAIL = 1;
-extern BOOL gDebugGL;
+BOOL gDebugGL = FALSE;
BOOL check_same_clock_dir( const LLVector3& pt1, const LLVector3& pt2, const LLVector3& pt3, const LLVector3& norm)
{
@@ -2143,19 +2143,22 @@ BOOL LLVolume::generate()
F32 profile_detail = mDetail;
F32 path_detail = mDetail;
-
- U8 path_type = mParams.getPathParams().getCurveType();
- U8 profile_type = mParams.getProfileParams().getCurveType();
-
- if (path_type == LL_PCODE_PATH_LINE && profile_type == LL_PCODE_PROFILE_CIRCLE)
- { //cylinders don't care about Z-Axis
- mLODScaleBias.setVec(0.6f, 0.6f, 0.0f);
- }
- else if (path_type == LL_PCODE_PATH_CIRCLE)
- {
- mLODScaleBias.setVec(0.6f, 0.6f, 0.6f);
+
+ if ((mParams.getSculptType() & LL_SCULPT_TYPE_MASK) != LL_SCULPT_TYPE_MESH)
+ {
+ U8 path_type = mParams.getPathParams().getCurveType();
+ U8 profile_type = mParams.getProfileParams().getCurveType();
+ if (path_type == LL_PCODE_PATH_LINE && profile_type == LL_PCODE_PROFILE_CIRCLE)
+ {
+ //cylinders don't care about Z-Axis
+ mLODScaleBias.setVec(0.6f, 0.6f, 0.0f);
+ }
+ else if (path_type == LL_PCODE_PATH_CIRCLE)
+ {
+ mLODScaleBias.setVec(0.6f, 0.6f, 0.6f);
+ }
}
-
+
BOOL regenPath = mPathp->generate(mParams.getPathParams(), path_detail, split);
BOOL regenProf = mProfilep->generate(mParams.getProfileParams(), mPathp->isOpen(),profile_detail, split);
@@ -2544,7 +2547,7 @@ bool LLVolume::unpackVolumeFaces(std::istream& is, S32 size)
U16 influence = weights[idx++];
influence |= ((U16) weights[idx++] << 8);
- F32 w = llclamp((F32) influence / 65535.f, 0.f, 0.99999f);
+ F32 w = llclamp((F32) influence / 65535.f, 0.001f, 0.999f);
wght.mV[cur_influence] = w;
joints[cur_influence] = joint;
cur_influence++;
@@ -2561,11 +2564,15 @@ bool LLVolume::unpackVolumeFaces(std::istream& is, S32 size)
F32 wsum = wght.mV[VX] + wght.mV[VY] + wght.mV[VZ] + wght.mV[VW];
if (wsum <= 0.f)
{
- wght = LLVector4(0.99999f,0.f,0.f,0.f);
+ wght = LLVector4(0.999f,0.f,0.f,0.f);
}
for (U32 k=0; k<4; k++)
{
- joints_with_weights[k] = (F32) joints[k] + wght[k];
+ F32 f_combined = (F32) joints[k] + wght[k];
+ joints_with_weights[k] = f_combined;
+ // Any weights we added above should wind up non-zero and applied to a specific bone.
+ // A failure here would indicate a floating point precision error in the math.
+ llassert((k >= cur_influence) || (f_combined - S32(f_combined) > 0.0f));
}
face.mWeights[cur_vertex].loadua(joints_with_weights.mV);
@@ -4568,6 +4575,7 @@ LLVolumeFace::LLVolumeFace() :
mTexCoords(NULL),
mIndices(NULL),
mWeights(NULL),
+ mWeightsScrubbed(FALSE),
mOctree(NULL),
mOptimized(FALSE)
{
@@ -4593,6 +4601,7 @@ LLVolumeFace::LLVolumeFace(const LLVolumeFace& src)
mTexCoords(NULL),
mIndices(NULL),
mWeights(NULL),
+ mWeightsScrubbed(FALSE),
mOctree(NULL)
{
mExtents = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*3);
@@ -4664,6 +4673,7 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src)
ll_aligned_free_16(mWeights);
mWeights = NULL;
}
+ mWeightsScrubbed = src.mWeightsScrubbed;
}
if (mNumIndices)
diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h
index 1da2d0c6b1..bf81c978a0 100644
--- a/indra/llmath/llvolume.h
+++ b/indra/llmath/llvolume.h
@@ -199,6 +199,8 @@ const U8 LL_SCULPT_FLAG_MASK = LL_SCULPT_FLAG_INVERT | LL_SCULPT_FLAG_MIRROR;
const S32 LL_SCULPT_MESH_MAX_FACES = 8;
+extern BOOL gDebugGL;
+
class LLProfileParams
{
public:
@@ -953,6 +955,8 @@ public:
// mWeights.size() should be empty or match mVertices.size()
LLVector4a* mWeights;
+ mutable BOOL mWeightsScrubbed;
+
LLOctreeNode<LLVolumeTriangle>* mOctree;
//whether or not face has been cache optimized
diff --git a/indra/llmath/tests/v4math_test.cpp b/indra/llmath/tests/v4math_test.cpp
index 191ac864df..9779dfded3 100644
--- a/indra/llmath/tests/v4math_test.cpp
+++ b/indra/llmath/tests/v4math_test.cpp
@@ -355,7 +355,8 @@ namespace tut
val3 = z1 + (z2 - z1)* val;
val4 = w1 + (w2 - w1)* val;
LLVector4 vec4b = lerp(vec4,vec4a,val);
- ensure("lerp failed", ((val1 ==vec4b.mV[VX])&& (val2 ==vec4b.mV[VY]) && (val3 ==vec4b.mV[VZ])&& (val4 ==vec4b.mV[VW])));
+ LLVector4 check(val1, val2, val3, val4);
+ ensure_equals("lerp failed", check, vec4b);
}
template<> template<>
diff --git a/indra/llmath/v4coloru.h b/indra/llmath/v4coloru.h
index fddad34978..704ce852d9 100644
--- a/indra/llmath/v4coloru.h
+++ b/indra/llmath/v4coloru.h
@@ -47,14 +47,7 @@ class LLColor4U
{
public:
- union
- {
- U8 mV[LENGTHOFCOLOR4U];
- U32 mAll;
- LLColor4* mSources;
- LLColor4U* mSourcesU;
- };
-
+ U8 mV[LENGTHOFCOLOR4U];
LLColor4U(); // Initializes LLColor4U to (0, 0, 0, 1)
LLColor4U(U8 r, U8 g, U8 b); // Initializes LLColor4U to (r, g, b, 1)
@@ -132,6 +125,9 @@ public:
return LLColor4(*this);
}
+ U32 asRGBA() const;
+ void fromRGBA( U32 aVal );
+
static LLColor4U white;
static LLColor4U black;
static LLColor4U red;
@@ -565,6 +561,26 @@ void LLColor4U::setVecScaleClamp(const LLColor3& color)
mV[3] = 255;
}
+inline U32 LLColor4U::asRGBA() const
+{
+ // Little endian: values are swapped in memory. The original code access the array like a U32, so we need to swap here
+
+ return (mV[3] << 24) | (mV[2] << 16) | (mV[1] << 8) | mV[0];
+}
+
+inline void LLColor4U::fromRGBA( U32 aVal )
+{
+ // Little endian: values are swapped in memory. The original code access the array like a U32, so we need to swap here
+
+ mV[ 0 ] = aVal & 0xFF;
+ aVal >>= 8;
+ mV[ 1 ] = aVal & 0xFF;
+ aVal >>= 8;
+ mV[ 2 ] = aVal & 0xFF;
+ aVal >>= 8;
+ mV[ 3 ] = aVal & 0xFF;
+}
+
#endif