5 files changed, 230 insertions, 53 deletions
diff --git a/indra/newview/lldrawpoolavatar.cpp b/indra/newview/lldrawpoolavatar.cpp
index 15a0595179..789a254389 100644
--- a/indra/newview/lldrawpoolavatar.cpp
+++ b/indra/newview/lldrawpoolavatar.cpp
@@ -38,6 +38,7 @@
 #include "lldrawable.h"
 #include "lldrawpoolbump.h"
 #include "llface.h"
+#include "llvolume.h"
 #include "llmeshrepository.h"
 #include "llsky.h"
 #include "llviewercamera.h"
@@ -1833,15 +1834,13 @@ void LLDrawPoolAvatar::updateRiggedFaceVertexBuffer(
     LLFace* face,
     const LLMeshSkinInfo* skin,
     LLVolume* volume,
-    const LLVolumeFace& vol_face)
+    LLVolumeFace& vol_face)
 {
 	LLVector4a* weights = vol_face.mWeights;
 	if (!weights)
 	{
 		return;
 	}
-    // FIXME ugly const cast
-    LLSkinningUtil::scrubInvalidJoints(avatar, const_cast<LLMeshSkinInfo*>(skin));
 
 	LLPointer<LLVertexBuffer> buffer = face->getVertexBuffer();
 	LLDrawable* drawable = face->getDrawable();
@@ -1851,6 +1850,48 @@ void LLDrawPoolAvatar::updateRiggedFaceVertexBuffer(
 		return;
 	}
 
+    const U32 max_joints = LLSkinningUtil::getMaxJointCount();
+
+#if USE_SEPARATE_JOINT_INDICES_AND_WEIGHTS
+    #define CONDITION_WEIGHT(f) ((U8)llclamp((S32)f, (S32)0, (S32)max_joints-1))
+    LLVector4a* just_weights = vol_face.mJustWeights;
+    // we need to calculate the separated indices and store just the matrix weights for this vol...
+    if (!vol_face.mJointIndices)
+    {
+        // not very consty after all...
+        vol_face.allocateJointIndices(vol_face.mNumVertices);
+        just_weights = vol_face.mJustWeights;
+
+        U8* joint_indices_cursor = vol_face.mJointIndices;
+        for (int i = 0; i < vol_face.mNumVertices; i++)
+        {
+            F32* w = weights[i].getF32ptr();
+            F32* w_ = just_weights[i].getF32ptr();
+
+            F32 w0 = floorf(w[0]);
+            F32 w1 = floorf(w[1]);
+            F32 w2 = floorf(w[2]);
+            F32 w3 = floorf(w[3]);
+
+            joint_indices_cursor[0] = CONDITION_WEIGHT(w0);
+            joint_indices_cursor[1] = CONDITION_WEIGHT(w1);
+            joint_indices_cursor[2] = CONDITION_WEIGHT(w2);
+            joint_indices_cursor[3] = CONDITION_WEIGHT(w3);
+
+            // remove joint portion of combined weight
+            w_[0] = w[0] - w0;
+            w_[1] = w[1] - w1;
+            w_[2] = w[2] - w2;
+            w_[3] = w[3] - w3;
+
+            joint_indices_cursor += 4;
+        }
+    }
+#endif
+
+    // FIXME ugly const cast
+    LLSkinningUtil::scrubInvalidJoints(avatar, const_cast<LLMeshSkinInfo*>(skin));
+
 	U32 data_mask = face->getRiggedVertexBufferDataMask();
 
     if (!vol_face.mWeightsScrubbed)
@@ -1927,29 +1968,67 @@ void LLDrawPoolAvatar::updateRiggedFaceVertexBuffer(
 		LLMatrix4a bind_shape_matrix;
 		bind_shape_matrix.loadu(skin->mBindShapeMatrix);
 
-        const U32 max_joints = LLSkinningUtil::getMaxJointCount();
-		for (U32 j = 0; j < buffer->getNumVerts(); ++j)
-		{
-			LLMatrix4a final_mat;
-            LLSkinningUtil::getPerVertexSkinMatrix(weights[j].getF32ptr(), mat, false, final_mat, max_joints);
-			
-			LLVector4a& v = vol_face.mPositions[j];
-
-			LLVector4a t;
-			LLVector4a dst;
-			bind_shape_matrix.affineTransform(v, t);
-			final_mat.affineTransform(t, dst);
-			pos[j] = dst;
-
-			if (norm)
-			{
-				LLVector4a& n = vol_face.mNormals[j];
-				bind_shape_matrix.rotate(n, t);
-				final_mat.rotate(t, dst);
-				dst.normalize3fast();
-				norm[j] = dst;
-			}
-		}
+#if USE_SEPARATE_JOINT_INDICES_AND_WEIGHTS
+        U8* joint_indices_cursor = vol_face.mJointIndices;
+        // fast path with joint indices separate from weights
+        if (joint_indices_cursor)
+        {
+            LLMatrix4a src[4];
+		    for (U32 j = 0; j < buffer->getNumVerts(); ++j)
+		    {
+			    LLMatrix4a final_mat;
+                //LLMatrix4a final_mat_correct;
+
+                F32* jw = just_weights[j].getF32ptr();
+
+                LLSkinningUtil::getPerVertexSkinMatrixWithIndices(jw, joint_indices_cursor, mat, final_mat, src);                
+
+                joint_indices_cursor += 4;
+
+			    LLVector4a& v = vol_face.mPositions[j];
+
+			    LLVector4a t;
+			    LLVector4a dst;
+			    bind_shape_matrix.affineTransform(v, t);
+			    final_mat.affineTransform(t, dst);
+			    pos[j] = dst;
+
+			    if (norm)
+			    {
+				    LLVector4a& n = vol_face.mNormals[j];
+				    bind_shape_matrix.rotate(n, t);
+				    final_mat.rotate(t, dst);
+				    dst.normalize3fast();
+				    norm[j] = dst;
+			    }
+		    }
+        }
+        // slow path with joint indices calculated from weights
+        else
+#endif
+        {
+            for (U32 j = 0; j < buffer->getNumVerts(); ++j)
+		    {
+			    LLMatrix4a final_mat;
+                LLSkinningUtil::getPerVertexSkinMatrix(weights[j].getF32ptr(), mat, false, final_mat, max_joints);
+
+			    LLVector4a& v = vol_face.mPositions[j];
+			    LLVector4a t;
+			    LLVector4a dst;
+			    bind_shape_matrix.affineTransform(v, t);
+			    final_mat.affineTransform(t, dst);
+			    pos[j] = dst;
+
+			    if (norm)
+			    {
+				    LLVector4a& n = vol_face.mNormals[j];
+				    bind_shape_matrix.rotate(n, t);
+				    final_mat.rotate(t, dst);
+				    //dst.normalize3fast();
+				    norm[j] = dst;
+			    }
+		    }
+        }
 	}
 }
 
@@ -2301,7 +2380,7 @@ void LLDrawPoolAvatar::updateRiggedVertexBuffers(LLVOAvatar* avatar)
 
 			stop_glerror();
 
-			const LLVolumeFace& vol_face = volume->getVolumeFace(te);
+			LLVolumeFace& vol_face = volume->getVolumeFace(te);
 			updateRiggedFaceVertexBuffer(avatar, face, skin, volume, vol_face);
 		}
 	}
diff --git a/indra/newview/lldrawpoolavatar.h b/indra/newview/lldrawpoolavatar.h
index e8add0e1d8..cb09eb18e2 100644
--- a/indra/newview/lldrawpoolavatar.h
+++ b/indra/newview/lldrawpoolavatar.h
@@ -257,7 +257,7 @@ typedef enum
 									  LLFace* facep, 
 									  const LLMeshSkinInfo* skin, 
 									  LLVolume* volume,
-									  const LLVolumeFace& vol_face);
+									  LLVolumeFace& vol_face);
 	void updateRiggedVertexBuffers(LLVOAvatar* avatar);
 
 	void renderRigged(LLVOAvatar* avatar, U32 type, bool glow = false);
diff --git a/indra/newview/llskinningutil.cpp b/indra/newview/llskinningutil.cpp
index 0fa4c2b114..83b9c8971a 100644
--- a/indra/newview/llskinningutil.cpp
+++ b/indra/newview/llskinningutil.cpp
@@ -34,8 +34,12 @@
 #include "llvolume.h"
 #include "llrigginginfo.h"
 
+#define DEBUG_SKINNING  LL_DEBUG
+#define MAT_USE_SSE     1
+
 void dump_avatar_and_skin_state(const std::string& reason, LLVOAvatar *avatar, const LLMeshSkinInfo *skin)
 {
+#if DEBUG_SKINNING
     static S32 dump_count = 0;
     const S32 max_dump = 10;
 
@@ -81,16 +85,16 @@ void dump_avatar_and_skin_state(const std::string& reason, LLVOAvatar *avatar, c
 
         dump_count++;
     }
+#endif
 }
 
 void LLSkinningUtil::initClass()
 {
 }
 
-U32 LLSkinningUtil::getMaxJointCount()
+S32 LLSkinningUtil::getMaxJointCount()
 {
-    U32 result = LL_MAX_JOINTS_PER_MESH_OBJECT;
-	return result;
+    return (S32)LL_MAX_JOINTS_PER_MESH_OBJECT;
 }
 
 U32 LLSkinningUtil::getMeshJointCount(const LLMeshSkinInfo *skin)
@@ -120,6 +124,8 @@ void LLSkinningUtil::scrubInvalidJoints(LLVOAvatar *avatar, LLMeshSkinInfo* skin
     skin->mInvalidJointsScrubbed = true;
 }
 
+#define MAT_USE_SSE 1
+
 void LLSkinningUtil::initSkinningMatrixPalette(
     LLMatrix4* mat,
     S32 count, 
@@ -130,9 +136,9 @@ void LLSkinningUtil::initSkinningMatrixPalette(
     for (U32 j = 0; j < count; ++j)
     {
         LLJoint *joint = avatar->getJoint(skin->mJointNums[j]);
+        llassert(joint);
         if (joint)
         {
-#define MAT_USE_SSE
 #ifdef MAT_USE_SSE
             LLMatrix4a bind, world, res;
             bind.loadu(skin->mInvBindMatrix[j]);
@@ -147,6 +153,7 @@ void LLSkinningUtil::initSkinningMatrixPalette(
         else
         {
             mat[j] = skin->mInvBindMatrix[j];
+#if DEBUG_SKINNING
             // This  shouldn't  happen   -  in  mesh  upload,  skinned
             // rendering  should  be disabled  unless  all joints  are
             // valid.  In other  cases of  skinned  rendering, invalid
@@ -157,16 +164,15 @@ void LLSkinningUtil::initSkinningMatrixPalette(
             LL_WARNS_ONCE("Avatar") << avatar->getFullname() 
                                     << " avatar build state: isBuilt() " << avatar->isBuilt() 
                                     << " mInitFlags " << avatar->mInitFlags << LL_ENDL;
-#if 0
-            dump_avatar_and_skin_state("initSkinningMatrixPalette joint not found", avatar, skin);
 #endif
+            dump_avatar_and_skin_state("initSkinningMatrixPalette joint not found", avatar, skin);
         }
     }
 }
 
 void LLSkinningUtil::checkSkinWeights(LLVector4a* weights, U32 num_vertices, const LLMeshSkinInfo* skin)
 {
-#ifdef SHOW_ASSERT                  // same condition that controls llassert()
+#if DEBUG_SKINNING
 	const S32 max_joints = skin->mJointNames.size();
     for (U32 j=0; j<num_vertices; j++)
     {
@@ -265,6 +271,7 @@ void LLSkinningUtil::initJointNums(LLMeshSkinInfo* skin, LLVOAvatar *avatar)
     {
         for (U32 j = 0; j < skin->mJointNames.size(); ++j)
         {
+    #if DEBUG_SKINNING     
             LLJoint *joint = NULL;
             if (skin->mJointNums[j] == -1)
             {
@@ -282,11 +289,16 @@ void LLSkinningUtil::initJointNums(LLMeshSkinInfo* skin, LLVOAvatar *avatar)
                 {
                     LL_WARNS_ONCE("Avatar") << avatar->getFullname() << " unable to find joint " << skin->mJointNames[j] << LL_ENDL;
                     LL_WARNS_ONCE("Avatar") << avatar->getFullname() << " avatar build state: isBuilt() " << avatar->isBuilt() << " mInitFlags " << avatar->mInitFlags << LL_ENDL;
-#if 0
                     dump_avatar_and_skin_state("initJointNums joint not found", avatar, skin);
-#endif
+                    skin->mJointNums[j] = 0;
                 }
             }
+    #else
+            LLJoint *joint = (skin->mJointNums[j] == -1) ? avatar->getJoint(skin->mJointNames[j]) : avatar->getJoint(skin->mJointNums[j]);
+            skin->mJointNums[j] = joint ? joint->getJointNum() : 0;            
+    #endif
+            // insure we have *a* valid joint to reference
+            llassert(skin->mJointNums[j] >= 0);
         }
         skin->mJointNumsInitialized = true;
     }
@@ -344,14 +356,17 @@ void LLSkinningUtil::updateRiggingInfo(const LLMeshSkinInfo* skin, LLVOAvatar *a
 
                                 // FIXME could precompute these matMuls.
                                 LLMatrix4a bind_shape;
-                                bind_shape.loadu(skin->mBindShapeMatrix);
                                 LLMatrix4a inv_bind;
-                                inv_bind.loadu(skin->mInvBindMatrix[joint_index]);
                                 LLMatrix4a mat;
-                                matMul(bind_shape, inv_bind, mat);
                                 LLVector4a pos_joint_space;
+
+                                bind_shape.loadu(skin->mBindShapeMatrix);
+                                inv_bind.loadu(skin->mInvBindMatrix[joint_index]);
+                                matMul(bind_shape, inv_bind, mat);
+
                                 mat.affineTransform(pos, pos_joint_space);
                                 pos_joint_space.mul(wght[k]);
+
                                 LLVector4a *extents = rig_info_tab[joint_num].getRiggedExtents();
                                 update_min_max(extents[0], extents[1], pos_joint_space);
                             }
@@ -366,6 +381,8 @@ void LLSkinningUtil::updateRiggingInfo(const LLMeshSkinInfo* skin, LLVOAvatar *a
                 vol_face.mJointRiggingInfoTab.setNeedsUpdate(false);
             }
         }
+
+#if DEBUG_SKINNING
         if (vol_face.mJointRiggingInfoTab.size()!=0)
         {
             LL_DEBUGS("RigSpammish") << "we have rigging info for vf " << &vol_face 
@@ -376,10 +393,40 @@ void LLSkinningUtil::updateRiggingInfo(const LLMeshSkinInfo* skin, LLVOAvatar *a
             LL_DEBUGS("RigSpammish") << "no rigging info for vf " << &vol_face 
                                      << " num_verts " << vol_face.mNumVertices << LL_ENDL; 
         }
+#endif
 
     }
 }
 
+void LLSkinningUtil::updateRiggingInfo_(LLMeshSkinInfo* skin, LLVOAvatar *avatar, S32 num_verts, LLVector4a* weights, LLVector4a* positions, U8* joint_indices, LLJointRiggingInfoTab &rig_info_tab)
+{
+    LL_RECORD_BLOCK_TIME(FTM_FACE_RIGGING_INFO);
+    for (S32 i=0; i < num_verts; i++)
+    {
+        LLVector4a& pos  = positions[i];
+        LLVector4a& wght = weights[i];
+        for (U32 k=0; k<4; ++k)
+        {
+            S32 joint_num = skin->mJointNums[joint_indices[k]];
+            llassert(joint_num >= 0 && joint_num < LL_CHARACTER_MAX_ANIMATED_JOINTS);
+            {
+                rig_info_tab[joint_num].setIsRiggedTo(true);
+                LLMatrix4a bind_shape;
+                bind_shape.loadu(skin->mBindShapeMatrix);
+                LLMatrix4a inv_bind;
+                inv_bind.loadu(skin->mInvBindMatrix[joint_indices[k]]);
+                LLMatrix4a mat;
+                matMul(bind_shape, inv_bind, mat);
+                LLVector4a pos_joint_space;
+                mat.affineTransform(pos, pos_joint_space);
+                pos_joint_space.mul(wght[k]);
+                LLVector4a *extents = rig_info_tab[joint_num].getRiggedExtents();
+                update_min_max(extents[0], extents[1], pos_joint_space);
+            }
+        }
+    }
+}
+
 // This is used for extracting rotation from a bind shape matrix that
 // already has scales baked in
 LLQuaternion LLSkinningUtil::getUnscaledQuaternion(const LLMatrix4& mat4)
diff --git a/indra/newview/llskinningutil.h b/indra/newview/llskinningutil.h
index ccc501adc0..d39356451d 100644
--- a/indra/newview/llskinningutil.h
+++ b/indra/newview/llskinningutil.h
@@ -27,23 +27,48 @@
 #ifndef LLSKINNINGUTIL_H
 #define LLSKINNINGUTIL_H
 
+#include "v2math.h"
+#include "v4math.h"
+#include "llvector4a.h"
+#include "llmatrix4a.h"
+
 class LLVOAvatar;
 class LLMeshSkinInfo;
-class LLMatrix4a;
 class LLVolumeFace;
+class LLJointRiggingInfoTab;
 
 namespace LLSkinningUtil
 {
     void initClass();
-    U32 getMaxJointCount();
+    S32 getMaxJointCount();
     U32 getMeshJointCount(const LLMeshSkinInfo *skin);
     void scrubInvalidJoints(LLVOAvatar *avatar, LLMeshSkinInfo* skin);
     void initSkinningMatrixPalette(LLMatrix4* mat, S32 count, const LLMeshSkinInfo* skin, LLVOAvatar *avatar);
     void checkSkinWeights(LLVector4a* weights, U32 num_vertices, const LLMeshSkinInfo* skin);
     void scrubSkinWeights(LLVector4a* weights, U32 num_vertices, const LLMeshSkinInfo* skin);
     void getPerVertexSkinMatrix(F32* weights, LLMatrix4a* mat, bool handle_bad_scale, LLMatrix4a& final_mat, U32 max_joints);
+
+    LL_FORCE_INLINE void getPerVertexSkinMatrixWithIndices(
+        F32*        weights,
+        U8*         idx,
+        LLMatrix4a* mat,
+        LLMatrix4a& final_mat,
+        LLMatrix4a* src)
+    {    
+        final_mat.clear();
+        src[0].setMul(mat[idx[0]], weights[0]);
+        src[1].setMul(mat[idx[1]], weights[1]);
+        final_mat.add(src[0]);
+        final_mat.add(src[1]);
+        src[2].setMul(mat[idx[2]], weights[2]);        
+        src[3].setMul(mat[idx[3]], weights[3]);
+        final_mat.add(src[2]);
+        final_mat.add(src[3]);
+    }
+
     void initJointNums(LLMeshSkinInfo* skin, LLVOAvatar *avatar);
     void updateRiggingInfo(const LLMeshSkinInfo* skin, LLVOAvatar *avatar, LLVolumeFace& vol_face);
+    void updateRiggingInfo_(LLMeshSkinInfo* skin, LLVOAvatar *avatar, S32 num_verts, LLVector4a* weights, LLVector4a* positions, U8* joint_indices, LLJointRiggingInfoTab &rig_info_tab);
 	LLQuaternion getUnscaledQuaternion(const LLMatrix4& mat4);
 };
 
diff --git a/indra/newview/llvovolume.cpp b/indra/newview/llvovolume.cpp
index 02ef7612a7..706e2c6895 100644
--- a/indra/newview/llvovolume.cpp
+++ b/indra/newview/llvovolume.cpp
@@ -4787,18 +4787,44 @@ void LLRiggedVolume::update(const LLMeshSkinInfo* skin, LLVOAvatar* avatar, cons
                 U32 max_joints = LLSkinningUtil::getMaxJointCount();
                 rigged_vert_count += dst_face.mNumVertices;
                 rigged_face_count++;
-				for (U32 j = 0; j < dst_face.mNumVertices; ++j)
-				{
-					LLMatrix4a final_mat;
-                    LLSkinningUtil::getPerVertexSkinMatrix(weight[j].getF32ptr(), mat, false, final_mat, max_joints);
+
+            #if USE_SEPARATE_JOINT_INDICES_AND_WEIGHTS
+                if (vol_face.mJointIndices) // fast path with preconditioned joint indices
+                {
+                    LLMatrix4a src[4];
+                    U8* joint_indices_cursor = vol_face.mJointIndices;
+                    LLVector4a* just_weights = vol_face.mJustWeights;
+                    for (U32 j = 0; j < dst_face.mNumVertices; ++j)
+				    {
+					    LLMatrix4a final_mat;
+                        F32* w = just_weights[j].getF32ptr();
+                        LLSkinningUtil::getPerVertexSkinMatrixWithIndices(w, joint_indices_cursor, mat, final_mat, src);
+                        joint_indices_cursor += 4;
+
+					    LLVector4a& v = vol_face.mPositions[j];
+					    LLVector4a t;
+					    LLVector4a dst;
+					    bind_shape_matrix.affineTransform(v, t);
+					    final_mat.affineTransform(t, dst);
+					    pos[j] = dst;
+				    }
+                }
+                else
+            #endif
+                {
+				    for (U32 j = 0; j < dst_face.mNumVertices; ++j)
+				    {
+					    LLMatrix4a final_mat;
+                        LLSkinningUtil::getPerVertexSkinMatrix(weight[j].getF32ptr(), mat, false, final_mat, max_joints);
 				
-					LLVector4a& v = vol_face.mPositions[j];
-					LLVector4a t;
-					LLVector4a dst;
-					bind_shape_matrix.affineTransform(v, t);
-					final_mat.affineTransform(t, dst);
-					pos[j] = dst;
-				}
+					    LLVector4a& v = vol_face.mPositions[j];
+					    LLVector4a t;
+					    LLVector4a dst;
+					    bind_shape_matrix.affineTransform(v, t);
+					    final_mat.affineTransform(t, dst);
+					    pos[j] = dst;
+				    }
+                }
 
 				//update bounding box
 				// VFExtents change