MAINT-6913 - (Via Sovereign Engineer and Shyotl Kuhr) Pack the skinned matrix and translation into a single mat3x4 for optimal data transfer to reduce uniform slot usage.

author: Brad Payne (Vir Linden) <vir@lindenlab.com> 2016-11-11 09:37:45 -0500
committer: Brad Payne (Vir Linden) <vir@lindenlab.com> 2016-11-11 09:37:45 -0500
commit: 5fb30e5ad3615c0b0e9e67a94542dbb110e9ed95 (patch)
tree: 50e0341f2beec61836ffb79a31c05d24f761248a /indra/newview
parent: a6d9323c7266cf496bc1db6df8ce5bcb58f84ede (diff)
2 files changed, 26 insertions, 40 deletions
diff --git a/indra/newview/app_settings/shaders/class1/avatar/objectSkinV.glsl b/indra/newview/app_settings/shaders/class1/avatar/objectSkinV.glsl
index 8f754fe82b..90bf2851c9 100644
--- a/indra/newview/app_settings/shaders/class1/avatar/objectSkinV.glsl
+++ b/indra/newview/app_settings/shaders/class1/avatar/objectSkinV.glsl
@@ -24,11 +24,7 @@
 
 ATTRIBUTE vec4 weight4;  
 
-/* BENTO JOINT COUNT LIMITS
- * Note that the value in these two lines also needs to be updated to value-1 several places below.
- */
-uniform mat3 matrixPalette[MAX_JOINTS_PER_MESH_OBJECT];
-uniform vec3 translationPalette[MAX_JOINTS_PER_MESH_OBJECT];
+uniform mat3x4 matrixPalette[MAX_JOINTS_PER_MESH_OBJECT];
 
 mat4 getObjectSkinnedTransform()
 {
@@ -37,8 +33,8 @@ mat4 getObjectSkinnedTransform()
 	vec4 w = fract(weight4);
 	vec4 index = floor(weight4);
 	
-		 index = min(index, vec4(MAX_JOINTS_PER_MESH_OBJECT-1));
-		 index = max(index, vec4( 0.0));
+    index = min(index, vec4(MAX_JOINTS_PER_MESH_OBJECT-1));
+    index = max(index, vec4( 0.0));
 
     w *= 1.0/(w.x+w.y+w.z+w.w);
 	
@@ -46,16 +42,16 @@ mat4 getObjectSkinnedTransform()
 	int i2 = int(index.y);
 	int i3 = int(index.z);
 	int i4 = int(index.w);
-		
-	mat3 mat  = matrixPalette[i1]*w.x;
-		 mat += matrixPalette[i2]*w.y;
-		 mat += matrixPalette[i3]*w.z;
-		 mat += matrixPalette[i4]*w.w;
 
-	vec3 trans = translationPalette[i1]*w.x;
-	trans += translationPalette[i2]*w.y;
-	trans += translationPalette[i3]*w.z;
-	trans += translationPalette[i4]*w.w;
+	mat3 mat = mat3(matrixPalette[i1])*w.x;
+		 mat += mat3(matrixPalette[i2])*w.y;
+		 mat += mat3(matrixPalette[i3])*w.z;
+		 mat += mat3(matrixPalette[i4])*w.w;
+
+	vec3 trans = vec3(matrixPalette[i1][0].w,matrixPalette[i1][1].w,matrixPalette[i1][2].w)*w.x;
+		 trans += vec3(matrixPalette[i2][0].w,matrixPalette[i2][1].w,matrixPalette[i2][2].w)*w.y;
+		 trans += vec3(matrixPalette[i3][0].w,matrixPalette[i3][1].w,matrixPalette[i3][2].w)*w.z;
+		 trans += vec3(matrixPalette[i4][0].w,matrixPalette[i4][1].w,matrixPalette[i4][2].w)*w.w;
 
 	mat4 ret;
 
@@ -68,10 +64,8 @@ mat4 getObjectSkinnedTransform()
 
 #ifdef IS_AMD_CARD
    // If it's AMD make sure the GLSL compiler sees the arrays referenced once by static index. Otherwise it seems to optimise the storage awawy which leads to unfun crashes and artifacts.
-   mat3 dummy1 = matrixPalette[0];
-   vec3 dummy2 = translationPalette[0];
-   mat3 dummy3 = matrixPalette[MAX_JOINTS_PER_MESH_OBJECT-1];
-   vec3 dummy4 = translationPalette[MAX_JOINTS_PER_MESH_OBJECT-1];
+   mat3x4 dummy1 = matrixPalette[0];
+   mat3x4 dummy2 = matrixPalette[MAX_JOINTS_PER_MESH_OBJECT-1];
 #endif
 
 }
diff --git a/indra/newview/lldrawpoolavatar.cpp b/indra/newview/lldrawpoolavatar.cpp
index 517c69305a..10311044d2 100644
--- a/indra/newview/lldrawpoolavatar.cpp
+++ b/indra/newview/lldrawpoolavatar.cpp
@@ -1708,43 +1708,35 @@ void LLDrawPoolAvatar::renderRigged(LLVOAvatar* avatar, U32 type, bool glow)
 
 				stop_glerror();
 
-				F32 mp[LL_MAX_JOINTS_PER_MESH_OBJECT*9];
-
-				F32 transp[LL_MAX_JOINTS_PER_MESH_OBJECT*3];
+				F32 mp[LL_MAX_JOINTS_PER_MESH_OBJECT*12];
 
 				for (U32 i = 0; i < count; ++i)
 				{
 					F32* m = (F32*) mat[i].mMatrix[0].getF32ptr();
 
-					U32 idx = i*9;
+					U32 idx = i*12;
 
 					mp[idx+0] = m[0];
 					mp[idx+1] = m[1];
 					mp[idx+2] = m[2];
+					mp[idx+3] = m[12];
 
-					mp[idx+3] = m[4];
-					mp[idx+4] = m[5];
-					mp[idx+5] = m[6];
-
-					mp[idx+6] = m[8];
-					mp[idx+7] = m[9];
-					mp[idx+8] = m[10];
-
-					idx = i*3;
+					mp[idx+4] = m[4];
+					mp[idx+5] = m[5];
+					mp[idx+6] = m[6];
+					mp[idx+7] = m[13];
 
-					transp[idx+0] = m[12];
-					transp[idx+1] = m[13];
-					transp[idx+2] = m[14];
+					mp[idx+8] = m[8];
+					mp[idx+9] = m[9];
+					mp[idx+10] = m[10];
+					mp[idx+11] = m[14];
 				}
 
-				LLDrawPoolAvatar::sVertexProgram->uniformMatrix3fv(LLViewerShaderMgr::AVATAR_MATRIX, 
+				LLDrawPoolAvatar::sVertexProgram->uniformMatrix3x4fv(LLViewerShaderMgr::AVATAR_MATRIX, 
 					count,
 					FALSE,
 					(GLfloat*) mp);
 
-				LLDrawPoolAvatar::sVertexProgram->uniform3fv(LLShaderMgr::AVATAR_TRANSLATION, count, transp);
-
-				
 				stop_glerror();
 			}
 			else
author	Brad Payne (Vir Linden) <vir@lindenlab.com>	2016-11-11 09:37:45 -0500
committer	Brad Payne (Vir Linden) <vir@lindenlab.com>	2016-11-11 09:37:45 -0500
commit	5fb30e5ad3615c0b0e9e67a94542dbb110e9ed95 (patch)
tree	50e0341f2beec61836ffb79a31c05d24f761248a /indra/newview
parent	a6d9323c7266cf496bc1db6df8ce5bcb58f84ede (diff)