From 5460c0f4c80660f93723a80d464c0a5ebd97921a Mon Sep 17 00:00:00 2001
From: "Brad Payne (Vir Linden)" <vir@lindenlab.com>
Date: Mon, 28 Sep 2015 11:53:01 -0400
Subject: SL-227 WIP - standardize usage of shared skinning code and handle
 additional error case in weights.

---
 indra/newview/lldrawpoolavatar.cpp      | 23 +++++++++++++++++++----
 indra/newview/llfloatermodelpreview.cpp | 32 ++++++++++++++++++--------------
 2 files changed, 37 insertions(+), 18 deletions(-)

diff --git a/indra/newview/lldrawpoolavatar.cpp b/indra/newview/lldrawpoolavatar.cpp
index 76c89865a5..b3821fda85 100755
--- a/indra/newview/lldrawpoolavatar.cpp
+++ b/indra/newview/lldrawpoolavatar.cpp
@@ -1544,6 +1544,8 @@ void LLDrawPoolAvatar::initSkinningMatrixPalette(
     const LLMeshSkinInfo* skin,
     LLVOAvatar *avatar)
 {
+    // BENTO - switching to use Matrix4a and SSE might speed this up.
+    // Note that we are mostly passing Matrix4a's to this routine anyway, just dubiously casted.
     for (U32 j = 0; j < count; ++j)
     {
         LLJoint* joint = avatar->getJoint(skin->mJointNames[j]);
@@ -1557,6 +1559,14 @@ void LLDrawPoolAvatar::initSkinningMatrixPalette(
             mat[j] *= joint->getWorldMatrix();
         }
     }
+    // This handles a bogus weights case that has turned up in
+    // practice, without the overhead of zeroing every matrix.  We are
+    // doing this here instead of in getPerVertexSkinMatrix so the fix
+    // will also work in the HW skinning case.
+    if (count < LL_MAX_JOINTS_PER_MESH_OBJECT)
+    {
+        mat[count].setIdentity();
+    }
 }
 
 // static
@@ -1573,6 +1583,12 @@ void LLDrawPoolAvatar::getPerVertexSkinMatrix(F32* weights, LLMatrix4a* mat, boo
     {
         F32 w = weights[k];
 
+        // BENTO potential optimizations
+        // - Do clamping in unpackVolumeFaces() (once instead of every time)
+        // - int vs floor: if we know w is
+        // >= 0.0, we can use int instead of floorf; the latter
+        // allegedly has a lot of overhead due to ieeefp error
+        // checking which we should not need.
         idx[k] = llclamp((S32) floorf(w), (S32)0, (S32)LL_MAX_JOINTS_PER_MESH_OBJECT-1);
 
         wght[k] = w - floorf(w);
@@ -1670,7 +1686,6 @@ void LLDrawPoolAvatar::updateRiggedFaceVertexBuffer(
 		//build matrix palette
 		LLMatrix4a mat[LL_MAX_JOINTS_PER_MESH_OBJECT];
         U32 count = llmin((U32) skin->mJointNames.size(), (U32) LL_MAX_JOINTS_PER_MESH_OBJECT);
-
         initSkinningMatrixPalette((LLMatrix4*)mat, count, skin, avatar);
 
 		LLMatrix4a bind_shape_matrix;
@@ -1761,9 +1776,9 @@ void LLDrawPoolAvatar::renderRigged(LLVOAvatar* avatar, U32 type, bool glow)
 			if (sShaderLevel > 0)
 			{
                 // upload matrix palette to shader
-				LLMatrix4 mat[LL_MAX_JOINTS_PER_MESH_OBJECT];
+				LLMatrix4a mat[LL_MAX_JOINTS_PER_MESH_OBJECT];
 				U32 count = llmin((U32) skin->mJointNames.size(), (U32) LL_MAX_JOINTS_PER_MESH_OBJECT);
-                initSkinningMatrixPalette(mat, count, skin, avatar);
+                initSkinningMatrixPalette((LLMatrix4*)mat, count, skin, avatar);
 
 				stop_glerror();
 
@@ -1773,7 +1788,7 @@ void LLDrawPoolAvatar::renderRigged(LLVOAvatar* avatar, U32 type, bool glow)
 
 				for (U32 i = 0; i < count; ++i)
 				{
-					F32* m = (F32*) mat[i].mMatrix;
+					F32* m = (F32*) mat[i].mMatrix[0].getF32ptr();
 
 					U32 idx = i*9;
 
diff --git a/indra/newview/llfloatermodelpreview.cpp b/indra/newview/llfloatermodelpreview.cpp
index 7e5d3a9f29..0736f1eac4 100755
--- a/indra/newview/llfloatermodelpreview.cpp
+++ b/indra/newview/llfloatermodelpreview.cpp
@@ -5573,26 +5573,30 @@ BOOL LLModelPreview::render()
 
 							//build matrix palette
 
-							LLMatrix4 mat[LL_MAX_JOINTS_PER_MESH_OBJECT];
-                            U32 count = llmin((U32) model->mSkinInfo.mJointNames.size(), (U32) LL_MAX_JOINTS_PER_MESH_OBJECT);
-                            LLDrawPoolAvatar::initSkinningMatrixPalette(mat, count, &model->mSkinInfo, getPreviewAvatar());
-
+							LLMatrix4a mat[LL_MAX_JOINTS_PER_MESH_OBJECT];
+                            const LLMeshSkinInfo *skin = &model->mSkinInfo;
+                            U32 count = llmin((U32) skin->mJointNames.size(), (U32) LL_MAX_JOINTS_PER_MESH_OBJECT);
+                            LLDrawPoolAvatar::initSkinningMatrixPalette((LLMatrix4*)mat, count,
+                                                                        skin, getPreviewAvatar());
+                            LLMatrix4a bind_shape_matrix;
+                            bind_shape_matrix.loadu(skin->mBindShapeMatrix);
 							for (U32 j = 0; j < buffer->getNumVerts(); ++j)
 							{
-                                LLMatrix4a final_mata;
-                                LLDrawPoolAvatar::getPerVertexSkinMatrix(weight[j].mV, (LLMatrix4a*)mat, true, final_mata);
-
-                                // BENTO GROSS KLUDGERY
-								LLMatrix4 final_mat;
-                                memcpy(&final_mat,&final_mata,sizeof(LLMatrix4a));
+                                LLMatrix4a final_mat;
+                                F32 *wptr = weight[j].mV;
+                                LLDrawPoolAvatar::getPerVertexSkinMatrix(wptr, mat, true, final_mat);
 
 								//VECTORIZE THIS
-								LLVector3 v(face.mPositions[j].getF32ptr());
+                                LLVector4a& v = face.mPositions[j];
 
-								v = v * model->mSkinInfo.mBindShapeMatrix;
-								v = v * final_mat;
+                                LLVector4a t;
+                                LLVector4a dst;
+                                bind_shape_matrix.affineTransform(v, t);
+                                final_mat.affineTransform(t, dst);
 
-								position[j] = v;
+								position[j][0] = dst[0];
+								position[j][1] = dst[1];
+								position[j][2] = dst[2];
 							}
 
 							llassert(model->mMaterialList.size() > i);
-- 
cgit v1.2.3