Merge

author: Rider Linden <rider@lindenlab.com> 2017-01-25 11:13:59 -0800
committer: Rider Linden <rider@lindenlab.com> 2017-01-25 11:13:59 -0800
commit: 25cd2ed2052b0d30d61e8b81df151993983f561b (patch)
tree: 7f9b29356b658303bdf7608a25ccb23c8e6130da /indra/llmath
parent: 8d86f5a0c457fceff5923fb7ca32259b2718addc (diff)
parent: 68413474c4479eee9bdbeb34ea131475ba1d646e (diff)
3 files changed, 65 insertions, 16 deletions
diff --git a/indra/llmath/llmatrix4a.h b/indra/llmath/llmatrix4a.h
index d141298f69..216334752a 100644
--- a/indra/llmath/llmatrix4a.h
+++ b/indra/llmath/llmatrix4a.h
@@ -121,7 +121,7 @@ public:
 		res.add(z);
 	}
 
-	inline void affineTransform(const LLVector4a& v, LLVector4a& res)
+	inline void affineTransformSSE(const LLVector4a& v, LLVector4a& res)
 	{
 		LLVector4a x,y,z;
 
@@ -137,6 +137,43 @@ public:
 		z.add(mMatrix[3]);
 		res.setAdd(x,z);
 	}
+
+    inline void affineTransformNonSSE(const LLVector4a& v, LLVector4a& res)
+    {
+        F32 x = v[0] * mMatrix[0][0] + v[1] * mMatrix[1][0] + v[2] * mMatrix[2][0] + mMatrix[3][0];
+        F32 y = v[0] * mMatrix[0][1] + v[1] * mMatrix[1][1] + v[2] * mMatrix[2][1] + mMatrix[3][1];
+        F32 z = v[0] * mMatrix[0][2] + v[1] * mMatrix[1][2] + v[2] * mMatrix[2][2] + mMatrix[3][2];
+        F32 w = 1.0f;
+        res.set(x,y,z,w);
+    }
+
+	inline void affineTransform(const LLVector4a& v, LLVector4a& res)
+    {
+        affineTransformSSE(v,res);
+    }
 };
 
+inline LLVector4a rowMul(const LLVector4a &row, const LLMatrix4a &mat)
+{
+    LLVector4a result;
+    result = _mm_mul_ps(_mm_shuffle_ps(row, row, _MM_SHUFFLE(0, 0, 0, 0)), mat.mMatrix[0]);
+    result = _mm_add_ps(result, _mm_mul_ps(_mm_shuffle_ps(row, row, _MM_SHUFFLE(1, 1, 1, 1)), mat.mMatrix[1]));
+    result = _mm_add_ps(result, _mm_mul_ps(_mm_shuffle_ps(row, row, _MM_SHUFFLE(2, 2, 2, 2)), mat.mMatrix[2]));
+    result = _mm_add_ps(result, _mm_mul_ps(_mm_shuffle_ps(row, row, _MM_SHUFFLE(3, 3, 3, 3)), mat.mMatrix[3]));
+    return result;
+}
+
+inline void matMul(const LLMatrix4a &a, const LLMatrix4a &b, LLMatrix4a &res)
+{
+    LLVector4a row0 = rowMul(a.mMatrix[0], b);
+    LLVector4a row1 = rowMul(a.mMatrix[1], b);
+    LLVector4a row2 = rowMul(a.mMatrix[2], b);
+    LLVector4a row3 = rowMul(a.mMatrix[3], b);
+
+    res.mMatrix[0] = row0;
+    res.mMatrix[1] = row1;
+    res.mMatrix[2] = row2;
+    res.mMatrix[3] = row3;
+}
+
 #endif
diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp
index d932eb53a0..6f0b4b2410 100644
--- a/indra/llmath/llvolume.cpp
+++ b/indra/llmath/llvolume.cpp
@@ -2143,19 +2143,22 @@ BOOL LLVolume::generate()
 	
 	F32 profile_detail = mDetail;
 	F32 path_detail = mDetail;
-	
-	U8 path_type = mParams.getPathParams().getCurveType();
-	U8 profile_type = mParams.getProfileParams().getCurveType();
-	
-	if (path_type == LL_PCODE_PATH_LINE && profile_type == LL_PCODE_PROFILE_CIRCLE)
-	{ //cylinders don't care about Z-Axis
-		mLODScaleBias.setVec(0.6f, 0.6f, 0.0f);
-	}
-	else if (path_type == LL_PCODE_PATH_CIRCLE) 
-	{	
-		mLODScaleBias.setVec(0.6f, 0.6f, 0.6f);
+
+	if ((mParams.getSculptType() & LL_SCULPT_TYPE_MASK) != LL_SCULPT_TYPE_MESH)
+	{
+		U8 path_type = mParams.getPathParams().getCurveType();
+		U8 profile_type = mParams.getProfileParams().getCurveType();
+		if (path_type == LL_PCODE_PATH_LINE && profile_type == LL_PCODE_PROFILE_CIRCLE)
+		{
+			//cylinders don't care about Z-Axis
+			mLODScaleBias.setVec(0.6f, 0.6f, 0.0f);
+		}
+		else if (path_type == LL_PCODE_PATH_CIRCLE)
+		{
+			mLODScaleBias.setVec(0.6f, 0.6f, 0.6f);
+		}
 	}
-	
+
 	BOOL regenPath = mPathp->generate(mParams.getPathParams(), path_detail, split);
 	BOOL regenProf = mProfilep->generate(mParams.getProfileParams(), mPathp->isOpen(),profile_detail, split);
 
@@ -2544,7 +2547,7 @@ bool LLVolume::unpackVolumeFaces(std::istream& is, S32 size)
 						U16 influence = weights[idx++];
 						influence |= ((U16) weights[idx++] << 8);
 
-						F32 w = llclamp((F32) influence / 65535.f, 0.f, 0.99999f);
+						F32 w = llclamp((F32) influence / 65535.f, 0.001f, 0.999f);
 						wght.mV[cur_influence] = w;
 						joints[cur_influence] = joint;
 						cur_influence++;
@@ -2561,11 +2564,15 @@ bool LLVolume::unpackVolumeFaces(std::istream& is, S32 size)
                     F32 wsum = wght.mV[VX] + wght.mV[VY] + wght.mV[VZ] + wght.mV[VW];
                     if (wsum <= 0.f)
                     {
-                        wght = LLVector4(0.99999f,0.f,0.f,0.f);
+                        wght = LLVector4(0.999f,0.f,0.f,0.f);
                     }
                     for (U32 k=0; k<4; k++)
                     {
-                        joints_with_weights[k] = (F32) joints[k] + wght[k];
+                        F32 f_combined = (F32) joints[k] + wght[k];
+                        joints_with_weights[k] = f_combined;
+                        // Any weights we added above should wind up non-zero and applied to a specific bone.
+                        // A failure here would indicate a floating point precision error in the math.
+                        llassert((k >= cur_influence) || (f_combined - S32(f_combined) > 0.0f));
                     }
 					face.mWeights[cur_vertex].loadua(joints_with_weights.mV);
 
@@ -4568,6 +4575,7 @@ LLVolumeFace::LLVolumeFace() :
 	mTexCoords(NULL),
 	mIndices(NULL),
 	mWeights(NULL),
+    mWeightsScrubbed(FALSE),
 	mOctree(NULL),
 	mOptimized(FALSE)
 {
@@ -4593,6 +4601,7 @@ LLVolumeFace::LLVolumeFace(const LLVolumeFace& src)
 	mTexCoords(NULL),
 	mIndices(NULL),
 	mWeights(NULL),
+    mWeightsScrubbed(FALSE),
 	mOctree(NULL)
 { 
 	mExtents = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*3);
@@ -4664,6 +4673,7 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src)
 			ll_aligned_free_16(mWeights);
 			mWeights = NULL;
 		}
+        mWeightsScrubbed = src.mWeightsScrubbed;
 	}
 
 	if (mNumIndices)
diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h
index 1da2d0c6b1..d66004cdad 100644
--- a/indra/llmath/llvolume.h
+++ b/indra/llmath/llvolume.h
@@ -953,6 +953,8 @@ public:
 	// mWeights.size() should be empty or match mVertices.size()  
 	LLVector4a* mWeights;
 
+    mutable BOOL mWeightsScrubbed;
+    
 	LLOctreeNode<LLVolumeTriangle>* mOctree;
 
 	//whether or not face has been cache optimized
author	Rider Linden <rider@lindenlab.com>	2017-01-25 11:13:59 -0800
committer	Rider Linden <rider@lindenlab.com>	2017-01-25 11:13:59 -0800
commit	25cd2ed2052b0d30d61e8b81df151993983f561b (patch)
tree	7f9b29356b658303bdf7608a25ccb23c8e6130da /indra/llmath
parent	8d86f5a0c457fceff5923fb7ca32259b2718addc (diff)
parent	68413474c4479eee9bdbeb34ea131475ba1d646e (diff)