From 095a5e84408b47ef3c5610e111aefe51d77633ca Mon Sep 17 00:00:00 2001
From: Dave Parks <davep@lindenlab.com>
Date: Sat, 6 Feb 2010 17:33:12 -0600
Subject: Draw prims using triangle strips instead of triangle lists.

---
 indra/newview/llviewerjointmesh.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'indra/newview/llviewerjointmesh.cpp')
diff --git a/indra/newview/llviewerjointmesh.cpp b/indra/newview/llviewerjointmesh.cpp
index 1a67fc0966..7225aa1523 100644
--- a/indra/newview/llviewerjointmesh.cpp
+++ b/indra/newview/llviewerjointmesh.cpp
@@ -626,7 +626,7 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea, BOOL first_pass, BOOL is_dummy)
 		mFace->mVertexBuffer->drawRange(LLRender::TRIANGLES, start, end, count, offset);
 		glPopMatrix();
 	}
-	gPipeline.addTrianglesDrawn(count/3);
+	gPipeline.addTrianglesDrawn(count);
 
 	triangle_count += count;
 	
-- 
cgit v1.2.3


From 38158f0e14663ac73c7ed79723ba6e34a1253e2a Mon Sep 17 00:00:00 2001
From: Dave Parks <davep@lindenlab.com>
Date: Thu, 18 Feb 2010 23:04:16 -0600
Subject: Model preview now loads materials.

---
 indra/newview/llviewerjointmesh.cpp | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'indra/newview/llviewerjointmesh.cpp')

diff --git a/indra/newview/llviewerjointmesh.cpp b/indra/newview/llviewerjointmesh.cpp
index 7225aa1523..92029d10f6 100644
--- a/indra/newview/llviewerjointmesh.cpp
+++ b/indra/newview/llviewerjointmesh.cpp
@@ -561,12 +561,6 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea, BOOL first_pass, BOOL is_dummy)
 		}
 		else
 		{
-			// This warning will always trigger if you've hacked the avatar to show as incomplete.
-			// Ignore the warning if that's the case.
-			if (!gSavedSettings.getBOOL("RenderUnloadedAvatar"))
-			{
-				//llwarns << "Layerset without composite" << llendl;
-			}
 			gGL.getTexUnit(0)->bind(LLViewerTextureManager::getFetchedTexture(IMG_DEFAULT));
 		}
 	}
-- 
cgit v1.2.3


From 71b0a63c8df29d5d69b777306dcf6280fd98886a Mon Sep 17 00:00:00 2001
From: Dave Parks <davep@lindenlab.com>
Date: Fri, 16 Apr 2010 13:00:01 -0500
Subject: Optimize LLViewerJointMesh::updateFaceData

---
 indra/newview/llviewerjointmesh.cpp | 88 +++++++++++++++++++++++++++++--------
 1 file changed, 69 insertions(+), 19 deletions(-)

(limited to 'indra/newview/llviewerjointmesh.cpp')

diff --git a/indra/newview/llviewerjointmesh.cpp b/indra/newview/llviewerjointmesh.cpp
index 92029d10f6..90a5a29bb4 100644
--- a/indra/newview/llviewerjointmesh.cpp
+++ b/indra/newview/llviewerjointmesh.cpp
@@ -661,6 +661,8 @@ void LLViewerJointMesh::updateFaceSizes(U32 &num_vertices, U32& num_indices, F32
 //-----------------------------------------------------------------------------
 // updateFaceData()
 //-----------------------------------------------------------------------------
+static LLFastTimer::DeclareTimer FTM_AVATAR_FACE("Avatar Face");
+
 void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_wind)
 {
 	mFace = face;
@@ -670,6 +672,8 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w
 		return;
 	}
 
+	LLFastTimer t(FTM_AVATAR_FACE);
+
 	LLStrider<LLVector3> verticesp;
 	LLStrider<LLVector3> normalsp;
 	LLStrider<LLVector2> tex_coordsp;
@@ -688,30 +692,76 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w
 			face->mVertexBuffer->getIndexStrider(indicesp);
 			stop_glerror();
 
-			for (U16 i = 0; i < mMesh->getNumVertices(); i++)
+			verticesp += mMesh->mFaceVertexOffset;
+			tex_coordsp += mMesh->mFaceVertexOffset;
+			normalsp += mMesh->mFaceVertexOffset;
+			vertex_weightsp += mMesh->mFaceVertexOffset;
+			clothing_weightsp += mMesh->mFaceVertexOffset;
+
+			U32* __restrict v = (U32*) verticesp.get();
+			const U32 vert_skip = verticesp.getSkip()/sizeof(U32);
+
+			U32* __restrict tc = (U32*) tex_coordsp.get();
+			const U32 tc_skip = tex_coordsp.getSkip()/sizeof(U32);
+
+			U32* __restrict n = (U32*) normalsp.get();
+			const U32 n_skip = normalsp.getSkip()/sizeof(U32);
+			
+			U32* __restrict vw = (U32*) vertex_weightsp.get();
+			const U32 vw_skip = vertex_weightsp.getSkip()/sizeof(U32);
+
+
+			U32* __restrict cw = (U32*) clothing_weightsp.get();
+			const U32 cw_skip = vertex_weightsp.getSkip()/sizeof(U32);
+
+			const U32* __restrict coords = (U32*) mMesh->getCoords();
+			const U32* __restrict tex_coords = (U32*) mMesh->getTexCoords();
+			const U32* __restrict normals = (U32*) mMesh->getNormals();
+			const U32* __restrict weights = (U32*) mMesh->getWeights();
+			const U32* __restrict cloth_weights = (U32*) mMesh->getClothingWeights();
+
+			const U32 num_verts = mMesh->getNumVertices();
+
+			U32 i = 0;
+			do
 			{
-				verticesp[mMesh->mFaceVertexOffset + i] = *(mMesh->getCoords() + i);
-				tex_coordsp[mMesh->mFaceVertexOffset + i] = *(mMesh->getTexCoords() + i);
-				normalsp[mMesh->mFaceVertexOffset + i] = *(mMesh->getNormals() + i);
-				vertex_weightsp[mMesh->mFaceVertexOffset + i] = *(mMesh->getWeights() + i);
-				if (damp_wind)
-				{
-					clothing_weightsp[mMesh->mFaceVertexOffset + i] = LLVector4(0,0,0,0);
-				}
-				else
-				{
-					clothing_weightsp[mMesh->mFaceVertexOffset + i] = (*(mMesh->getClothingWeights() + i));
-				}
+				v[0] = *(coords++); 
+				v[1] = *(coords++); 
+				v[2] = *(coords++);
+				v += vert_skip;
+
+				tc[0] = *(tex_coords++); 
+				tc[1] = *(tex_coords++);
+				tc += tc_skip;
+
+				n[0] = *(normals++); 
+				n[1] = *(normals++);
+				n[2] = *(normals++);
+				n += n_skip;
+
+				vw[0] = *(weights++);
+				vw += vw_skip;
+
+				cw[0] = *(cloth_weights++);
+				cw[1] = *(cloth_weights++);
+				cw[2] = *(cloth_weights++);
+				cw[3] = *(cloth_weights++);
+				cw += cw_skip;
 			}
+			while (++i < num_verts);
+
+			const U32 idx_count = mMesh->getNumFaces()*3;
 
-			for (S32 i = 0; i < mMesh->getNumFaces(); i++)
+			U16* __restrict idx = indicesp.get();
+			S32* __restrict src_idx = (S32*) mMesh->getFaces();
+
+			i = 0;
+
+			do
 			{
-				for (U32 j = 0; j < 3; j++)
-				{
-					U32 k = i*3+j+mMesh->mFaceIndexOffset;
-					indicesp[k] = mMesh->getFaces()[i][j] + mMesh->mFaceVertexOffset;
-				}
+				*(idx++) = *(src_idx++);
 			}
+			while (++i < idx_count);
 		}
 	}
 }
-- 
cgit v1.2.3


From 12499cebcba81175ae5d92926f5ec89632f00926 Mon Sep 17 00:00:00 2001
From: Dave Parks <davep@lindenlab.com>
Date: Fri, 16 Apr 2010 15:53:26 -0500
Subject: Fix for busted optimizations.

---
 indra/newview/llviewerjointmesh.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'indra/newview/llviewerjointmesh.cpp')

diff --git a/indra/newview/llviewerjointmesh.cpp b/indra/newview/llviewerjointmesh.cpp
index 90a5a29bb4..db2279d925 100644
--- a/indra/newview/llviewerjointmesh.cpp
+++ b/indra/newview/llviewerjointmesh.cpp
@@ -752,14 +752,18 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w
 
 			const U32 idx_count = mMesh->getNumFaces()*3;
 
+			indicesp += mMesh->mFaceIndexOffset;
+
 			U16* __restrict idx = indicesp.get();
 			S32* __restrict src_idx = (S32*) mMesh->getFaces();
 
 			i = 0;
 
+			const S32 offset = (S32) mMesh->mFaceVertexOffset;
+
 			do
 			{
-				*(idx++) = *(src_idx++);
+				*(idx++) = *(src_idx++)+offset;
 			}
 			while (++i < idx_count);
 		}
-- 
cgit v1.2.3


From e994b9dcee82b510dc881e2b14d053a27fe35472 Mon Sep 17 00:00:00 2001
From: Dave Parks <davep@lindenlab.com>
Date: Mon, 19 Apr 2010 23:33:34 -0500
Subject: Remove foot shadows from llvoavatar and add terse update to
 LLViewerJointMesh::updateFaceData.

---
 indra/newview/llviewerjointmesh.cpp | 121 +++++++++++++++++++++---------------
 1 file changed, 70 insertions(+), 51 deletions(-)

(limited to 'indra/newview/llviewerjointmesh.cpp')

diff --git a/indra/newview/llviewerjointmesh.cpp b/indra/newview/llviewerjointmesh.cpp
index db2279d925..fb6cc8d790 100644
--- a/indra/newview/llviewerjointmesh.cpp
+++ b/indra/newview/llviewerjointmesh.cpp
@@ -663,7 +663,7 @@ void LLViewerJointMesh::updateFaceSizes(U32 &num_vertices, U32& num_indices, F32
 //-----------------------------------------------------------------------------
 static LLFastTimer::DeclareTimer FTM_AVATAR_FACE("Avatar Face");
 
-void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_wind)
+void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_wind, bool terse_update)
 {
 	mFace = face;
 
@@ -698,22 +698,6 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w
 			vertex_weightsp += mMesh->mFaceVertexOffset;
 			clothing_weightsp += mMesh->mFaceVertexOffset;
 
-			U32* __restrict v = (U32*) verticesp.get();
-			const U32 vert_skip = verticesp.getSkip()/sizeof(U32);
-
-			U32* __restrict tc = (U32*) tex_coordsp.get();
-			const U32 tc_skip = tex_coordsp.getSkip()/sizeof(U32);
-
-			U32* __restrict n = (U32*) normalsp.get();
-			const U32 n_skip = normalsp.getSkip()/sizeof(U32);
-			
-			U32* __restrict vw = (U32*) vertex_weightsp.get();
-			const U32 vw_skip = vertex_weightsp.getSkip()/sizeof(U32);
-
-
-			U32* __restrict cw = (U32*) clothing_weightsp.get();
-			const U32 cw_skip = vertex_weightsp.getSkip()/sizeof(U32);
-
 			const U32* __restrict coords = (U32*) mMesh->getCoords();
 			const U32* __restrict tex_coords = (U32*) mMesh->getTexCoords();
 			const U32* __restrict normals = (U32*) mMesh->getNormals();
@@ -723,49 +707,84 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w
 			const U32 num_verts = mMesh->getNumVertices();
 
 			U32 i = 0;
-			do
+
+			const U32 skip = verticesp.getSkip()/sizeof(U32);
+
+			U32* __restrict v = (U32*) verticesp.get();
+			U32* __restrict n = (U32*) normalsp.get();
+			
+			if (terse_update)
 			{
-				v[0] = *(coords++); 
-				v[1] = *(coords++); 
-				v[2] = *(coords++);
-				v += vert_skip;
-
-				tc[0] = *(tex_coords++); 
-				tc[1] = *(tex_coords++);
-				tc += tc_skip;
-
-				n[0] = *(normals++); 
-				n[1] = *(normals++);
-				n[2] = *(normals++);
-				n += n_skip;
-
-				vw[0] = *(weights++);
-				vw += vw_skip;
-
-				cw[0] = *(cloth_weights++);
-				cw[1] = *(cloth_weights++);
-				cw[2] = *(cloth_weights++);
-				cw[3] = *(cloth_weights++);
-				cw += cw_skip;
+				for (S32 i = num_verts; i > 0; --i)
+				{
+					//morph target application only, only update positions and normals
+					v[0] = coords[0]; 
+					v[1] = coords[1]; 
+					v[2] = coords[2];		
+					coords += 3;
+					v += skip;
+				}
+
+				for (S32 i = num_verts; i > 0; --i)
+				{
+					n[0] = normals[0]; 
+					n[1] = normals[1];
+					n[2] = normals[2];
+					normals += 3;
+					n += skip;
+				}
 			}
-			while (++i < num_verts);
+			else
+				{
 
-			const U32 idx_count = mMesh->getNumFaces()*3;
+				U32* __restrict tc = (U32*) tex_coordsp.get();
+				U32* __restrict vw = (U32*) vertex_weightsp.get();
+				U32* __restrict cw = (U32*) clothing_weightsp.get();
+				
+				do
+				{
+					v[0] = *(coords++); 
+					v[1] = *(coords++); 
+					v[2] = *(coords++);
+					v += skip;
+
+					tc[0] = *(tex_coords++); 
+					tc[1] = *(tex_coords++);
+					tc += skip;
+
+					n[0] = *(normals++); 
+					n[1] = *(normals++);
+					n[2] = *(normals++);
+					n += skip;
+
+					vw[0] = *(weights++);
+					vw += skip;
+
+					cw[0] = *(cloth_weights++);
+					cw[1] = *(cloth_weights++);
+					cw[2] = *(cloth_weights++);
+					cw[3] = *(cloth_weights++);
+					cw += skip;
+				}
+				while (++i < num_verts);
 
-			indicesp += mMesh->mFaceIndexOffset;
+				const U32 idx_count = mMesh->getNumFaces()*3;
 
-			U16* __restrict idx = indicesp.get();
-			S32* __restrict src_idx = (S32*) mMesh->getFaces();
+				indicesp += mMesh->mFaceIndexOffset;
 
-			i = 0;
+				U16* __restrict idx = indicesp.get();
+				S32* __restrict src_idx = (S32*) mMesh->getFaces();
 
-			const S32 offset = (S32) mMesh->mFaceVertexOffset;
+				i = 0;
 
-			do
-			{
-				*(idx++) = *(src_idx++)+offset;
+				const S32 offset = (S32) mMesh->mFaceVertexOffset;
+
+				do
+				{
+					*(idx++) = *(src_idx++)+offset;
+				}
+				while (++i < idx_count);
 			}
-			while (++i < idx_count);
 		}
 	}
 }
-- 
cgit v1.2.3


From 49579bebdd274a88c2381c4cab3d09ecd393564d Mon Sep 17 00:00:00 2001
From: Dave Parks <davep@lindenlab.com>
Date: Mon, 10 May 2010 13:00:36 -0500
Subject: Fix for wierd triangle shadow bug and fix for ATI hating deferred
 rendering.

---
 indra/newview/llviewerjointmesh.cpp | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

(limited to 'indra/newview/llviewerjointmesh.cpp')

diff --git a/indra/newview/llviewerjointmesh.cpp b/indra/newview/llviewerjointmesh.cpp
index fb6cc8d790..deb3d8fd97 100644
--- a/indra/newview/llviewerjointmesh.cpp
+++ b/indra/newview/llviewerjointmesh.cpp
@@ -516,6 +516,8 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea, BOOL first_pass, BOOL is_dummy)
 
 	U32 triangle_count = 0;
 
+	S32 diffuse_channel = LLDrawPoolAvatar::sDiffuseChannel;
+
 	stop_glerror();
 	
 	//----------------------------------------------------------------
@@ -541,7 +543,7 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea, BOOL first_pass, BOOL is_dummy)
 	LLTexUnit::eTextureAddressMode old_mode = LLTexUnit::TAM_WRAP;
 	if (mTestImageName)
 	{
-		gGL.getTexUnit(0)->bindManual(LLTexUnit::TT_TEXTURE, mTestImageName);
+		gGL.getTexUnit(diffuse_channel)->bindManual(LLTexUnit::TT_TEXTURE, mTestImageName);
 
 		if (mIsTransparent)
 		{
@@ -550,18 +552,18 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea, BOOL first_pass, BOOL is_dummy)
 		else
 		{
 			glColor4f(0.7f, 0.6f, 0.3f, 1.f);
-			gGL.getTexUnit(0)->setTextureColorBlend(LLTexUnit::TBO_LERP_TEX_ALPHA, LLTexUnit::TBS_TEX_COLOR, LLTexUnit::TBS_PREV_COLOR);
+			gGL.getTexUnit(diffuse_channel)->setTextureColorBlend(LLTexUnit::TBO_LERP_TEX_ALPHA, LLTexUnit::TBS_TEX_COLOR, LLTexUnit::TBS_PREV_COLOR);
 		}
 	}
 	else if( !is_dummy && mLayerSet )
 	{
 		if(	mLayerSet->hasComposite() )
 		{
-			gGL.getTexUnit(0)->bind(mLayerSet->getComposite());
+			gGL.getTexUnit(diffuse_channel)->bind(mLayerSet->getComposite());
 		}
 		else
 		{
-			gGL.getTexUnit(0)->bind(LLViewerTextureManager::getFetchedTexture(IMG_DEFAULT));
+			gGL.getTexUnit(diffuse_channel)->bind(LLViewerTextureManager::getFetchedTexture(IMG_DEFAULT));
 		}
 	}
 	else
@@ -571,25 +573,25 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea, BOOL first_pass, BOOL is_dummy)
 		{
 			old_mode = mTexture->getAddressMode();
 		}
-		gGL.getTexUnit(0)->bind(mTexture.get());
-		gGL.getTexUnit(0)->bind(mTexture);
-		gGL.getTexUnit(0)->setTextureAddressMode(LLTexUnit::TAM_CLAMP);
+		gGL.getTexUnit(diffuse_channel)->bind(mTexture.get());
+		gGL.getTexUnit(diffuse_channel)->bind(mTexture);
+		gGL.getTexUnit(diffuse_channel)->setTextureAddressMode(LLTexUnit::TAM_CLAMP);
 	}
 	else
 	{
-		gGL.getTexUnit(0)->bind(LLViewerTextureManager::getFetchedTexture(IMG_DEFAULT));
+		gGL.getTexUnit(diffuse_channel)->bind(LLViewerTextureManager::getFetchedTexture(IMG_DEFAULT));
 	}
 	
 	if (gRenderForSelect)
 	{
 		if (isTransparent())
 		{
-			gGL.getTexUnit(0)->setTextureColorBlend(LLTexUnit::TBO_REPLACE, LLTexUnit::TBS_PREV_COLOR);
-			gGL.getTexUnit(0)->setTextureAlphaBlend(LLTexUnit::TBO_MULT, LLTexUnit::TBS_TEX_ALPHA, LLTexUnit::TBS_CONST_ALPHA);
+			gGL.getTexUnit(diffuse_channel)->setTextureColorBlend(LLTexUnit::TBO_REPLACE, LLTexUnit::TBS_PREV_COLOR);
+			gGL.getTexUnit(diffuse_channel)->setTextureAlphaBlend(LLTexUnit::TBO_MULT, LLTexUnit::TBS_TEX_ALPHA, LLTexUnit::TBS_CONST_ALPHA);
 		}
 		else
 		{
-			gGL.getTexUnit(0)->unbind(LLTexUnit::TT_TEXTURE);
+			gGL.getTexUnit(diffuse_channel)->unbind(LLTexUnit::TT_TEXTURE);
 		}
 	}
 	
@@ -626,13 +628,13 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea, BOOL first_pass, BOOL is_dummy)
 	
 	if (mTestImageName)
 	{
-		gGL.getTexUnit(0)->setTextureBlendType(LLTexUnit::TB_MULT);
+		gGL.getTexUnit(diffuse_channel)->setTextureBlendType(LLTexUnit::TB_MULT);
 	}
 
 	if (mTexture.notNull() && !is_dummy)
 	{
-		gGL.getTexUnit(0)->bind(mTexture);
-		gGL.getTexUnit(0)->setTextureAddressMode(old_mode);
+		gGL.getTexUnit(diffuse_channel)->bind(mTexture);
+		gGL.getTexUnit(diffuse_channel)->setTextureAddressMode(old_mode);
 	}
 
 	return triangle_count;
-- 
cgit v1.2.3


From f14215689244a65064158e475e4f41eb149d85b0 Mon Sep 17 00:00:00 2001
From: Dave Parks <davep@lindenlab.com>
Date: Thu, 20 May 2010 02:22:26 -0500
Subject: Vectorized avatar vertex skinning.

---
 indra/newview/llviewerjointmesh.cpp | 145 +++++++++++++++++-------------------
 1 file changed, 70 insertions(+), 75 deletions(-)

(limited to 'indra/newview/llviewerjointmesh.cpp')

diff --git a/indra/newview/llviewerjointmesh.cpp b/indra/newview/llviewerjointmesh.cpp
index deb3d8fd97..294dfdcb55 100644
--- a/indra/newview/llviewerjointmesh.cpp
+++ b/indra/newview/llviewerjointmesh.cpp
@@ -61,6 +61,7 @@
 #include "v4math.h"
 #include "m3math.h"
 #include "m4math.h"
+#include "llmatrix4a.h"
 
 #if !LL_DARWIN && !LL_LINUX && !LL_SOLARIS
 extern PFNGLWEIGHTPOINTERARBPROC glWeightPointerARB;
@@ -382,6 +383,7 @@ const S32 NUM_AXES = 3;
 // pivot parent 0-n -- child = n+1
 
 static LLMatrix4	gJointMatUnaligned[32];
+static LLMatrix4a	gJointMatAligned[32];
 static LLMatrix3	gJointRotUnaligned[32];
 static LLVector4	gJointPivot[32];
 
@@ -467,6 +469,14 @@ void LLViewerJointMesh::uploadJointMatrices()
 		glUniform4fvARB(gAvatarMatrixParam, 45, mat);
 		stop_glerror();
 	}
+	else
+	{
+		//load gJointMatUnaligned into gJointMatAligned
+		for (joint_num = 0; joint_num < reference_mesh->mJointRenderData.count(); ++joint_num)
+		{
+			gJointMatAligned[joint_num].loadu(gJointMatUnaligned[joint_num]);
+		}
+	}
 }
 
 //--------------------------------------------------------------------
@@ -723,7 +733,7 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w
 					v[0] = coords[0]; 
 					v[1] = coords[1]; 
 					v[2] = coords[2];		
-					coords += 3;
+					coords += 4;
 					v += skip;
 				}
 
@@ -732,12 +742,12 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w
 					n[0] = normals[0]; 
 					n[1] = normals[1];
 					n[2] = normals[2];
-					normals += 3;
+					normals += 4;
 					n += skip;
 				}
 			}
 			else
-				{
+			{
 
 				U32* __restrict tc = (U32*) tex_coordsp.get();
 				U32* __restrict vw = (U32*) vertex_weightsp.get();
@@ -745,18 +755,20 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w
 				
 				do
 				{
-					v[0] = *(coords++); 
-					v[1] = *(coords++); 
-					v[2] = *(coords++);
+					v[0] = coords[0]; 
+					v[1] = coords[1]; 
+					v[2] = coords[2];		
+					coords += 4;
 					v += skip;
 
 					tc[0] = *(tex_coords++); 
 					tc[1] = *(tex_coords++);
 					tc += skip;
 
-					n[0] = *(normals++); 
-					n[1] = *(normals++);
-					n[2] = *(normals++);
+					n[0] = normals[0]; 
+					n[1] = normals[1];
+					n[2] = normals[2];
+					normals += 4;
 					n += skip;
 
 					vw[0] = *(weights++);
@@ -808,17 +820,17 @@ void LLViewerJointMesh::updateGeometryOriginal(LLFace *mFace, LLPolyMesh *mMesh)
 	LLStrider<LLVector3> o_normals;
 
 	//get vertex and normal striders
-	LLVertexBuffer *buffer = mFace->mVertexBuffer;
+	LLVertexBuffer* buffer = mFace->mVertexBuffer;
 	buffer->getVertexStrider(o_vertices,  0);
 	buffer->getNormalStrider(o_normals,   0);
 
-	F32 last_weight = F32_MAX;
-	LLMatrix4 gBlendMat;
-	LLMatrix3 gBlendRotMat;
+	//F32 last_weight = F32_MAX;
+	LLMatrix4a gBlendMat;
+
+	__restrict const F32* weights = mMesh->getWeights();
+	__restrict const LLVector4* coords = mMesh->getCoords();
+	__restrict const LLVector4* normals = mMesh->getNormals();
 
-	const F32* weights = mMesh->getWeights();
-	const LLVector3* coords = mMesh->getCoords();
-	const LLVector3* normals = mMesh->getNormals();
 	for (U32 index = 0; index < mMesh->getNumVertices(); index++)
 	{
 		U32 bidx = index + mMesh->mFaceVertexOffset;
@@ -826,71 +838,54 @@ void LLViewerJointMesh::updateGeometryOriginal(LLFace *mFace, LLPolyMesh *mMesh)
 		// blend by first matrix
 		F32 w = weights[index]; 
 		
+		LLVector4a coord;
+		coord.load4a(coords[index].mV);
+
+		LLVector4a norm;
+		norm.load4a(normals[index].mV);
+
 		// Maybe we don't have to change gBlendMat.
 		// Profiles of a single-avatar scene on a Mac show this to be a very
 		// common case.  JC
-		if (w == last_weight)
+		//if (w != last_weight)
 		{
-			o_vertices[bidx] = coords[index] * gBlendMat;
-			o_normals[bidx] = normals[index] * gBlendRotMat;
-			continue;
-		}
-		
-		last_weight = w;
+			//last_weight = w;
 
-		S32 joint = llfloor(w);
-		w -= joint;
-		
-		// No lerp required in this case.
-		if (w == 1.0f)
-		{
-			gBlendMat = gJointMatUnaligned[joint+1];
-			o_vertices[bidx] = coords[index] * gBlendMat;
-			gBlendRotMat = gJointRotUnaligned[joint+1];
-			o_normals[bidx] = normals[index] * gBlendRotMat;
-			continue;
+			S32 joint = llfloor(w);
+			w -= joint;
+				
+			
+			if (w >= 0.f)
+			{
+				// Try to keep all the accesses to the matrix data as close
+				// together as possible.  This function is a hot spot on the
+				// Mac. JC
+				gBlendMat.setLerp(gJointMatAligned[joint+0],
+								  gJointMatAligned[joint+1], w);
+
+				LLVector4a res;
+				gBlendMat.affineTransform(coord, res);
+				o_vertices[bidx].setVec(res[0], res[1], res[2]);
+				gBlendMat.rotate(norm, res);
+				o_normals[bidx].setVec(res[0], res[1], res[2]);
+			}
+			else
+			{  // No lerp required in this case.
+				LLVector4a res;
+				gJointMatAligned[joint].affineTransform(coord, res);
+				o_vertices[bidx].setVec(res[0], res[1], res[2]);
+				gJointMatAligned[joint].rotate(norm, res);
+				o_normals[bidx].setVec(res[0], res[1], res[2]);
+			}
 		}
-		
-		// Try to keep all the accesses to the matrix data as close
-		// together as possible.  This function is a hot spot on the
-		// Mac. JC
-		LLMatrix4 &m0 = gJointMatUnaligned[joint+1];
-		LLMatrix4 &m1 = gJointMatUnaligned[joint+0];
-		
-		gBlendMat.mMatrix[VX][VX] = lerp(m1.mMatrix[VX][VX], m0.mMatrix[VX][VX], w);
-		gBlendMat.mMatrix[VX][VY] = lerp(m1.mMatrix[VX][VY], m0.mMatrix[VX][VY], w);
-		gBlendMat.mMatrix[VX][VZ] = lerp(m1.mMatrix[VX][VZ], m0.mMatrix[VX][VZ], w);
-
-		gBlendMat.mMatrix[VY][VX] = lerp(m1.mMatrix[VY][VX], m0.mMatrix[VY][VX], w);
-		gBlendMat.mMatrix[VY][VY] = lerp(m1.mMatrix[VY][VY], m0.mMatrix[VY][VY], w);
-		gBlendMat.mMatrix[VY][VZ] = lerp(m1.mMatrix[VY][VZ], m0.mMatrix[VY][VZ], w);
-
-		gBlendMat.mMatrix[VZ][VX] = lerp(m1.mMatrix[VZ][VX], m0.mMatrix[VZ][VX], w);
-		gBlendMat.mMatrix[VZ][VY] = lerp(m1.mMatrix[VZ][VY], m0.mMatrix[VZ][VY], w);
-		gBlendMat.mMatrix[VZ][VZ] = lerp(m1.mMatrix[VZ][VZ], m0.mMatrix[VZ][VZ], w);
-
-		gBlendMat.mMatrix[VW][VX] = lerp(m1.mMatrix[VW][VX], m0.mMatrix[VW][VX], w);
-		gBlendMat.mMatrix[VW][VY] = lerp(m1.mMatrix[VW][VY], m0.mMatrix[VW][VY], w);
-		gBlendMat.mMatrix[VW][VZ] = lerp(m1.mMatrix[VW][VZ], m0.mMatrix[VW][VZ], w);
-
-		o_vertices[bidx] = coords[index] * gBlendMat;
-		
-		LLMatrix3 &n0 = gJointRotUnaligned[joint+1];
-		LLMatrix3 &n1 = gJointRotUnaligned[joint+0];
-		
-		gBlendRotMat.mMatrix[VX][VX] = lerp(n1.mMatrix[VX][VX], n0.mMatrix[VX][VX], w);
-		gBlendRotMat.mMatrix[VX][VY] = lerp(n1.mMatrix[VX][VY], n0.mMatrix[VX][VY], w);
-		gBlendRotMat.mMatrix[VX][VZ] = lerp(n1.mMatrix[VX][VZ], n0.mMatrix[VX][VZ], w);
-
-		gBlendRotMat.mMatrix[VY][VX] = lerp(n1.mMatrix[VY][VX], n0.mMatrix[VY][VX], w);
-		gBlendRotMat.mMatrix[VY][VY] = lerp(n1.mMatrix[VY][VY], n0.mMatrix[VY][VY], w);
-		gBlendRotMat.mMatrix[VY][VZ] = lerp(n1.mMatrix[VY][VZ], n0.mMatrix[VY][VZ], w);
-
-		gBlendRotMat.mMatrix[VZ][VX] = lerp(n1.mMatrix[VZ][VX], n0.mMatrix[VZ][VX], w);
-		gBlendRotMat.mMatrix[VZ][VY] = lerp(n1.mMatrix[VZ][VY], n0.mMatrix[VZ][VY], w);
-		gBlendRotMat.mMatrix[VZ][VZ] = lerp(n1.mMatrix[VZ][VZ], n0.mMatrix[VZ][VZ], w);
-		
-		o_normals[bidx] = normals[index] * gBlendRotMat;
+		/*else
+		{ //weight didn't change
+			LLVector4a res;
+			gBlendMat.affineTransform(coord, res);
+			o_vertices[bidx].setVec(res[0], res[1], res[2]);
+			gBlendMat.rotate(norm, res);
+			o_normals[bidx].setVec(res[0], res[1], res[2]);
+		}*/
 	}
 
 	buffer->setBuffer(0);
-- 
cgit v1.2.3


From bf5f215fbc29102cfd8b5418f29ea0ed6edd14ee Mon Sep 17 00:00:00 2001
From: Dave Parks <davep@lindenlab.com>
Date: Thu, 20 May 2010 02:46:01 -0500
Subject: Cleanup from review.

---
 indra/newview/llviewerjointmesh.cpp | 69 ++++++++++++++-----------------------
 1 file changed, 26 insertions(+), 43 deletions(-)

(limited to 'indra/newview/llviewerjointmesh.cpp')

diff --git a/indra/newview/llviewerjointmesh.cpp b/indra/newview/llviewerjointmesh.cpp
index 294dfdcb55..236ad98d68 100644
--- a/indra/newview/llviewerjointmesh.cpp
+++ b/indra/newview/llviewerjointmesh.cpp
@@ -827,9 +827,9 @@ void LLViewerJointMesh::updateGeometryOriginal(LLFace *mFace, LLPolyMesh *mMesh)
 	//F32 last_weight = F32_MAX;
 	LLMatrix4a gBlendMat;
 
-	__restrict const F32* weights = mMesh->getWeights();
-	__restrict const LLVector4* coords = mMesh->getCoords();
-	__restrict const LLVector4* normals = mMesh->getNormals();
+	const F32* __restrict weights = mMesh->getWeights();
+	const LLVector4a* __restrict coords = (LLVector4a*) mMesh->getCoords();
+	const LLVector4a* __restrict normals = (LLVector4a*) mMesh->getNormals();
 
 	for (U32 index = 0; index < mMesh->getNumVertices(); index++)
 	{
@@ -838,54 +838,37 @@ void LLViewerJointMesh::updateGeometryOriginal(LLFace *mFace, LLPolyMesh *mMesh)
 		// blend by first matrix
 		F32 w = weights[index]; 
 		
-		LLVector4a coord;
-		coord.load4a(coords[index].mV);
+		//LLVector4a coord;
+		//coord.load4a(coords[index].mV);
 
-		LLVector4a norm;
-		norm.load4a(normals[index].mV);
+		//LLVector4a norm;
+		//norm.load4a(normals[index].mV);
 
-		// Maybe we don't have to change gBlendMat.
-		// Profiles of a single-avatar scene on a Mac show this to be a very
-		// common case.  JC
-		//if (w != last_weight)
+		S32 joint = llfloor(w);
+		w -= joint;
+				
+		if (w > 0.f)
 		{
-			//last_weight = w;
+			// Try to keep all the accesses to the matrix data as close
+			// together as possible.  This function is a hot spot on the
+			// Mac. JC
+			gBlendMat.setLerp(gJointMatAligned[joint+0],
+							  gJointMatAligned[joint+1], w);
 
-			S32 joint = llfloor(w);
-			w -= joint;
-				
-			
-			if (w >= 0.f)
-			{
-				// Try to keep all the accesses to the matrix data as close
-				// together as possible.  This function is a hot spot on the
-				// Mac. JC
-				gBlendMat.setLerp(gJointMatAligned[joint+0],
-								  gJointMatAligned[joint+1], w);
-
-				LLVector4a res;
-				gBlendMat.affineTransform(coord, res);
-				o_vertices[bidx].setVec(res[0], res[1], res[2]);
-				gBlendMat.rotate(norm, res);
-				o_normals[bidx].setVec(res[0], res[1], res[2]);
-			}
-			else
-			{  // No lerp required in this case.
-				LLVector4a res;
-				gJointMatAligned[joint].affineTransform(coord, res);
-				o_vertices[bidx].setVec(res[0], res[1], res[2]);
-				gJointMatAligned[joint].rotate(norm, res);
-				o_normals[bidx].setVec(res[0], res[1], res[2]);
-			}
+			LLVector4a res;
+			gBlendMat.affineTransform(coords[index], res);
+			o_vertices[bidx].setVec(res[0], res[1], res[2]);
+			gBlendMat.rotate(normals[index], res);
+			o_normals[bidx].setVec(res[0], res[1], res[2]);
 		}
-		/*else
-		{ //weight didn't change
+		else
+		{  // No lerp required in this case.
 			LLVector4a res;
-			gBlendMat.affineTransform(coord, res);
+			gJointMatAligned[joint].affineTransform(coords[index], res);
 			o_vertices[bidx].setVec(res[0], res[1], res[2]);
-			gBlendMat.rotate(norm, res);
+			gJointMatAligned[joint].rotate(normals[index], res);
 			o_normals[bidx].setVec(res[0], res[1], res[2]);
-		}*/
+		}
 	}
 
 	buffer->setBuffer(0);
-- 
cgit v1.2.3


From 05a23f8dbaa45c64bcf6c55dd09a468ba2b1f144 Mon Sep 17 00:00:00 2001
From: Dave Parks <davep@lindenlab.com>
Date: Fri, 21 May 2010 04:49:12 -0500
Subject: Vectorized memcpy. 16-byte aligned vertex buffers. (almost) fully
 vectorized avatar vertex buffer updating --- index buffers still need to be
 vectorized

---
 indra/newview/llviewerjointmesh.cpp | 169 +++++++++++++-----------------------
 1 file changed, 61 insertions(+), 108 deletions(-)

(limited to 'indra/newview/llviewerjointmesh.cpp')

diff --git a/indra/newview/llviewerjointmesh.cpp b/indra/newview/llviewerjointmesh.cpp
index 236ad98d68..a7e7bfadd6 100644
--- a/indra/newview/llviewerjointmesh.cpp
+++ b/indra/newview/llviewerjointmesh.cpp
@@ -655,6 +655,9 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea, BOOL first_pass, BOOL is_dummy)
 //-----------------------------------------------------------------------------
 void LLViewerJointMesh::updateFaceSizes(U32 &num_vertices, U32& num_indices, F32 pixel_area)
 {
+	//bump num_vertices to next multiple of 4
+	num_vertices = (num_vertices + 0x3) & ~0x3;
+
 	// Do a pre-alloc pass to determine sizes of data.
 	if (mMesh && mValid)
 	{
@@ -677,6 +680,8 @@ static LLFastTimer::DeclareTimer FTM_AVATAR_FACE("Avatar Face");
 
 void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_wind, bool terse_update)
 {
+	//IF THIS FUNCTION BREAKS, SEE LLPOLYMESH CONSTRUCTOR AND CHECK ALIGNMENT OF INPUT ARRAYS
+
 	mFace = face;
 
 	if (mFace->mVertexBuffer.isNull())
@@ -684,6 +689,16 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w
 		return;
 	}
 
+	LLDrawPool *poolp = mFace->getPool();
+	BOOL hardware_skinning = (poolp && poolp->getVertexShaderLevel() > 0) ? TRUE : FALSE;
+
+	if (!hardware_skinning && terse_update)
+	{ //no need to do terse updates if we're doing software vertex skinning
+	 // since mMesh is being copied into mVertexBuffer every frame
+		return;
+	}
+
+
 	LLFastTimer t(FTM_AVATAR_FACE);
 
 	LLStrider<LLVector3> verticesp;
@@ -696,108 +711,52 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w
 	// Copy data into the faces from the polymesh data.
 	if (mMesh && mValid)
 	{
-		if (mMesh->getNumVertices())
+		const U32 num_verts = mMesh->getNumVertices();
+
+		if (num_verts)
 		{
-			stop_glerror();
 			face->getGeometryAvatar(verticesp, normalsp, tex_coordsp, vertex_weightsp, clothing_weightsp);
-			stop_glerror();
 			face->mVertexBuffer->getIndexStrider(indicesp);
-			stop_glerror();
 
 			verticesp += mMesh->mFaceVertexOffset;
-			tex_coordsp += mMesh->mFaceVertexOffset;
 			normalsp += mMesh->mFaceVertexOffset;
-			vertex_weightsp += mMesh->mFaceVertexOffset;
-			clothing_weightsp += mMesh->mFaceVertexOffset;
-
-			const U32* __restrict coords = (U32*) mMesh->getCoords();
-			const U32* __restrict tex_coords = (U32*) mMesh->getTexCoords();
-			const U32* __restrict normals = (U32*) mMesh->getNormals();
-			const U32* __restrict weights = (U32*) mMesh->getWeights();
-			const U32* __restrict cloth_weights = (U32*) mMesh->getClothingWeights();
-
-			const U32 num_verts = mMesh->getNumVertices();
-
-			U32 i = 0;
-
-			const U32 skip = verticesp.getSkip()/sizeof(U32);
+			
+			F32* v = (F32*) verticesp.get();
+			F32* n = (F32*) normalsp.get();
+			
+			U32 words = num_verts*4;
 
-			U32* __restrict v = (U32*) verticesp.get();
-			U32* __restrict n = (U32*) normalsp.get();
+			LLVector4a::memcpyNonAliased16(v, (F32*) mMesh->getCoords(), words);
+			LLVector4a::memcpyNonAliased16(n, (F32*) mMesh->getNormals(), words);
+						
 			
-			if (terse_update)
+			if (!terse_update)
 			{
-				for (S32 i = num_verts; i > 0; --i)
-				{
-					//morph target application only, only update positions and normals
-					v[0] = coords[0]; 
-					v[1] = coords[1]; 
-					v[2] = coords[2];		
-					coords += 4;
-					v += skip;
-				}
+				vertex_weightsp += mMesh->mFaceVertexOffset;
+				clothing_weightsp += mMesh->mFaceVertexOffset;
+				tex_coordsp += mMesh->mFaceVertexOffset;
+		
+				F32* tc = (F32*) tex_coordsp.get();
+				F32* vw = (F32*) vertex_weightsp.get();
+				F32* cw = (F32*) clothing_weightsp.get();	
 
-				for (S32 i = num_verts; i > 0; --i)
-				{
-					n[0] = normals[0]; 
-					n[1] = normals[1];
-					n[2] = normals[2];
-					normals += 4;
-					n += skip;
-				}
+				LLVector4a::memcpyNonAliased16(tc, (F32*) mMesh->getTexCoords(), num_verts*2);
+				LLVector4a::memcpyNonAliased16(vw, (F32*) mMesh->getWeights(), num_verts);	
+				LLVector4a::memcpyNonAliased16(cw, (F32*) mMesh->getClothingWeights(), num_verts*4);	
 			}
-			else
-			{
-
-				U32* __restrict tc = (U32*) tex_coordsp.get();
-				U32* __restrict vw = (U32*) vertex_weightsp.get();
-				U32* __restrict cw = (U32*) clothing_weightsp.get();
-				
-				do
-				{
-					v[0] = coords[0]; 
-					v[1] = coords[1]; 
-					v[2] = coords[2];		
-					coords += 4;
-					v += skip;
-
-					tc[0] = *(tex_coords++); 
-					tc[1] = *(tex_coords++);
-					tc += skip;
-
-					n[0] = normals[0]; 
-					n[1] = normals[1];
-					n[2] = normals[2];
-					normals += 4;
-					n += skip;
-
-					vw[0] = *(weights++);
-					vw += skip;
-
-					cw[0] = *(cloth_weights++);
-					cw[1] = *(cloth_weights++);
-					cw[2] = *(cloth_weights++);
-					cw[3] = *(cloth_weights++);
-					cw += skip;
-				}
-				while (++i < num_verts);
-
-				const U32 idx_count = mMesh->getNumFaces()*3;
 
-				indicesp += mMesh->mFaceIndexOffset;
+			const U32 idx_count = mMesh->getNumFaces()*3;
 
-				U16* __restrict idx = indicesp.get();
-				S32* __restrict src_idx = (S32*) mMesh->getFaces();
+			indicesp += mMesh->mFaceIndexOffset;
 
-				i = 0;
+			U16* __restrict idx = indicesp.get();
+			S32* __restrict src_idx = (S32*) mMesh->getFaces();	
 
-				const S32 offset = (S32) mMesh->mFaceVertexOffset;
+			const S32 offset = (S32) mMesh->mFaceVertexOffset;
 
-				do
-				{
-					*(idx++) = *(src_idx++)+offset;
-				}
-				while (++i < idx_count);
+			for (S32 i = 0; i < idx_count; ++i)
+			{
+				*(idx++) = *(src_idx++)+offset;
 			}
 		}
 	}
@@ -824,50 +783,44 @@ void LLViewerJointMesh::updateGeometryOriginal(LLFace *mFace, LLPolyMesh *mMesh)
 	buffer->getVertexStrider(o_vertices,  0);
 	buffer->getNormalStrider(o_normals,   0);
 
-	//F32 last_weight = F32_MAX;
-	LLMatrix4a gBlendMat;
+	F32* __restrict vert = o_vertices[0].mV;
+	F32* __restrict norm = o_normals[0].mV;
 
 	const F32* __restrict weights = mMesh->getWeights();
 	const LLVector4a* __restrict coords = (LLVector4a*) mMesh->getCoords();
 	const LLVector4a* __restrict normals = (LLVector4a*) mMesh->getNormals();
 
+	U32 offset = mMesh->mFaceVertexOffset*4;
+	vert += offset;
+	norm += offset;
+
 	for (U32 index = 0; index < mMesh->getNumVertices(); index++)
 	{
-		U32 bidx = index + mMesh->mFaceVertexOffset;
-		
-		// blend by first matrix
-		F32 w = weights[index]; 
-		
-		//LLVector4a coord;
-		//coord.load4a(coords[index].mV);
+		// equivalent to joint = floorf(weights[index]);
+		S32 joint = _mm_cvtt_ss2si(_mm_load_ss(weights+index));
+		F32 w = weights[index] - joint;		
 
-		//LLVector4a norm;
-		//norm.load4a(normals[index].mV);
+		LLMatrix4a gBlendMat;
 
-		S32 joint = llfloor(w);
-		w -= joint;
-				
-		if (w > 0.f)
+		if (w != 0.f)
 		{
-			// Try to keep all the accesses to the matrix data as close
-			// together as possible.  This function is a hot spot on the
-			// Mac. JC
+			// blend between matrices and apply
 			gBlendMat.setLerp(gJointMatAligned[joint+0],
 							  gJointMatAligned[joint+1], w);
 
 			LLVector4a res;
 			gBlendMat.affineTransform(coords[index], res);
-			o_vertices[bidx].setVec(res[0], res[1], res[2]);
+			res.store4a(vert+index*4);
 			gBlendMat.rotate(normals[index], res);
-			o_normals[bidx].setVec(res[0], res[1], res[2]);
+			res.store4a(norm+index*4);
 		}
 		else
 		{  // No lerp required in this case.
 			LLVector4a res;
 			gJointMatAligned[joint].affineTransform(coords[index], res);
-			o_vertices[bidx].setVec(res[0], res[1], res[2]);
+			res.store4a(vert+index*4);
 			gJointMatAligned[joint].rotate(normals[index], res);
-			o_normals[bidx].setVec(res[0], res[1], res[2]);
+			res.store4a(norm+index*4);
 		}
 	}
 
-- 
cgit v1.2.3


From 1ad56f84ef1102803986889cdd5b2a687adb557e Mon Sep 17 00:00:00 2001
From: Dave Parks <davep@lindenlab.com>
Date: Fri, 21 May 2010 14:33:05 -0500
Subject: Line endings fix.

---
 indra/newview/llviewerjointmesh.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'indra/newview/llviewerjointmesh.cpp')

diff --git a/indra/newview/llviewerjointmesh.cpp b/indra/newview/llviewerjointmesh.cpp
index a7e7bfadd6..91605005e3 100644
--- a/indra/newview/llviewerjointmesh.cpp
+++ b/indra/newview/llviewerjointmesh.cpp
@@ -797,7 +797,7 @@ void LLViewerJointMesh::updateGeometryOriginal(LLFace *mFace, LLPolyMesh *mMesh)
 	for (U32 index = 0; index < mMesh->getNumVertices(); index++)
 	{
 		// equivalent to joint = floorf(weights[index]);
-		S32 joint = _mm_cvtt_ss2si(_mm_load_ss(weights+index));
+		S32 joint = _mm_cvtt_ss2si(_mm_load_ss(weights+index));
 		F32 w = weights[index] - joint;		
 
 		LLMatrix4a gBlendMat;
-- 
cgit v1.2.3