merge release@58699 beta-1-14-0@58707 -> release

author: Steven Bennetts <steve@lindenlab.com> 2007-03-02 21:25:50 +0000
committer: Steven Bennetts <steve@lindenlab.com> 2007-03-02 21:25:50 +0000
commit: 4dabd9c0472deb49573fdafef2fa413e59703f19 (patch)
tree: 06c680d6a2047e03838d6548bccd26c7baf9d652 /indra/newview/llviewerjointmesh.cpp
parent: d4462963c6ba5db2088723bbedc7b60f1184c594 (diff)
1 files changed, 149 insertions, 675 deletions
diff --git a/indra/newview/llviewerjointmesh.cpp b/indra/newview/llviewerjointmesh.cpp
index aec15a8d6c..512ddc8565 100644
--- a/indra/newview/llviewerjointmesh.cpp
+++ b/indra/newview/llviewerjointmesh.cpp
@@ -19,7 +19,6 @@
 #include "llfasttimer.h"
 
 #include "llagent.h"
-#include "llagparray.h"
 #include "llbox.h"
 #include "lldrawable.h"
 #include "lldrawpoolavatar.h"
@@ -43,6 +42,10 @@ extern PFNGLVERTEXBLENDARBPROC glVertexBlendARB;
 #endif
 extern BOOL gRenderForSelect;
 
+static LLPointer<LLVertexBuffer> sRenderBuffer = NULL;
+static const U32 sRenderMask = LLVertexBuffer::MAP_VERTEX |
+							   LLVertexBuffer::MAP_NORMAL |
+							   LLVertexBuffer::MAP_TEXCOORD;
 LLMatrix4 gBlendMat;
 
 //-----------------------------------------------------------------------------
@@ -375,11 +378,11 @@ void LLViewerJointMesh::setupJoint(LLViewerJoint* current_joint)
 	}
 
 	// depth-first traversal
-	for (LLJoint *child_joint = current_joint->mChildren.getFirstData(); 
-		child_joint; 
-		child_joint = current_joint->mChildren.getNextData())
+	for (LLJoint::child_list_t::iterator iter = current_joint->mChildren.begin();
+		 iter != current_joint->mChildren.end(); ++iter)
 	{
-		setupJoint((LLViewerJoint*)child_joint);
+		LLViewerJoint* child_joint = (LLViewerJoint*)(*iter);
+		setupJoint(child_joint);
 	}
 }
 
@@ -412,7 +415,7 @@ void LLViewerJointMesh::uploadJointMatrices()
 
 		if (hardware_skinning)
 		{
-			joint_mat *= gCamera->getModelview();
+			joint_mat *= LLDrawPoolAvatar::getModelView();
 		}
 		gJointMat[joint_num] = joint_mat;
 		gJointRot[joint_num] = joint_mat.getMat3();
@@ -513,620 +516,39 @@ int compare_int(const void *a, const void *b)
 	else return 0;
 }
 
-#if LL_WINDOWS || (LL_DARWIN && __i386__) // SSE optimizations in avatar code
-
-#if LL_DARWIN
-#include <xmmintrin.h>
-
-	// On Windows, this class is defined in fvec.h.  I've only reproduced the parts of it we use here for now.
-	#pragma pack(push,16) /* Must ensure class & union 16-B aligned */
-	class F32vec4
-	{
-	protected:
-		 __m128 vec;
-	public:
-
-		/* Constructors: __m128, 4 floats, 1 float */
-		F32vec4() {}
-
-		/* initialize 4 SP FP with __m128 data type */
-		F32vec4(__m128 m)					{ vec = m;}
-
-		/* Explicitly initialize each of 4 SP FPs with same float */
-		explicit F32vec4(float f)	{ vec = _mm_set_ps1(f); }
-	};
-	#pragma pack(pop) /* 16-B aligned */
-	
-	
-#endif
-
-void blend_SSE_32_32_batch(const int vert_offset, const int vert_count, float* output,
-					 LLStrider<LLVector3>& vertices, LLStrider<LLVector2>& texcoords, LLStrider<LLVector3>& normals, LLStrider<F32>& weights)
+void llDrawRangeElements(GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const GLvoid *indices)
 {
-	F32 last_weight = F32_MAX;
-	LLMatrix4 *blend_mat = &gBlendMat;
-
-	for (S32 index = vert_offset; index < vert_offset + vert_count; index++)
+	if (end-start+1 > (U32) gGLManager.mGLMaxVertexRange ||
+		count > gGLManager.mGLMaxIndexRange)
 	{
-		F32     w  = weights  [index];        // register copy of weight
-		F32  *vin  = &vertices[index].mV[0];  // pointer to input vertex data, assumed to be V3+T2+N3+whatever
-		F32  *vout = output + index * (AVATAR_VERTEX_BYTES/sizeof(F32));    // pointer to the output vertex data, assumed to be 16 byte aligned
-
-		if (w == last_weight)
-		{
-			// load input and output vertices, and last blended matrix
-			__asm {
-				mov		esi,  vin
-				mov		edi,  vout
-
-				mov		edx,  blend_mat
-				movaps	xmm4, [edx]
-				movaps	xmm5, [edx+0x10]
-				movaps	xmm6, [edx+0x20]
-				movaps	xmm7, [edx+0x30]
-			}
-		}
-		else
-		{
-			last_weight = w;
-			S32 joint = llfloor(w);
-			w -= joint;
-
-			LLMatrix4 *m0 = &(gJointMat[joint+1]);
-			LLMatrix4 *m1 = &(gJointMat[joint+0]);
-
-			// some initial code to load Matrix 0 into SSE registers
-			__asm {
-				mov		esi,  vin
-				mov		edi,  vout
-
-				//matrix2
-				mov		edx,  m0
-				movaps	xmm4, [edx]
-				movaps	xmm5, [edx+0x10]
-				movaps	xmm6, [edx+0x20]
-				movaps	xmm7, [edx+0x30]
-			};
-
-			// if w == 1.0f, we don't need to blend.
-			// but since we do the trick of blending the matrices, here, if w != 1.0,
-			// we load Matrix 1 into the other 4 SSE registers and blend both matrices
-			// based on the weight (which we load ingo a 16-byte aligned vector: w,w,w,w)
-
-			if (w != 1.0f) 
-			{
-					F32vec4 weight(w);
-
-				__asm { // do blending of matrices instead of verts and normals -- faster
-					mov		edx,  m1
-					movaps	xmm0, [edx]
-					movaps	xmm1, [edx+0x10]
-					movaps	xmm2, [edx+0x20]
-					movaps	xmm3, [edx+0x30]
-
-					subps	xmm4,  xmm0 // do blend for each matrix column
-					subps	xmm5,  xmm1 // diff, then multiply weight and re-add
-					subps	xmm6,  xmm2
-					subps	xmm7,  xmm3
-
-					mulps   xmm4,  weight
-					mulps   xmm5,  weight
-					mulps   xmm6,  weight
-					mulps   xmm7,  weight
-
-					addps   xmm4,  xmm0
-					addps   xmm5,  xmm1
-					addps   xmm6,  xmm2
-					addps   xmm7,  xmm3
-				};
-			}
-
-			__asm {
-				// save off blended matrix
-				mov		edx,   blend_mat;
-				movaps	[edx], xmm4;
-				movaps	[edx+0x10], xmm5;
-				movaps	[edx+0x20], xmm6;
-				movaps	[edx+0x30], xmm7;
-			}
-		}
-
-		// now, we have either a blended matrix in xmm4-7 or the original Matrix 0
-		// we then multiply each vertex and normal by this one matrix.
-
-		// For SSE2, we would try to keep the original two matrices in other registers
-		// and avoid reloading them. However, they should ramain in L1 cache in the 
-		// current case.
-
-		// One possible optimization would be to sort the vertices by weight instead
-		// of just index (we still want to uniqify). If we note when two or more vertices
-		// share the same weight, we can avoid doing the middle SSE code above and just
-		// re-use the blended matrix for those vertices
-
-
-		// now, we do the actual vertex blending
-		__asm {			
-			// load Vertex into xmm0.
-			movaps	xmm0, [esi] // change aps to ups when input is no longer 16-baligned
-			movaps	xmm1, xmm0  // copy vector into xmm0 through xmm2 (x,y,z)
-			movaps	xmm2, xmm0
-			shufps	xmm0, xmm0, _MM_SHUFFLE(0,0,0,0); // clone vertex (x) across vector
-			shufps	xmm1, xmm1, _MM_SHUFFLE(1,1,1,1); // clone vertex (y) across vector
-			shufps	xmm2, xmm2, _MM_SHUFFLE(2,2,2,2); // same for Z
-			mulps	xmm0, xmm4 // do the actual matrix multipication for r0
-			mulps	xmm1, xmm5 // for r1
-			mulps	xmm2, xmm6 // for r2
-			addps	xmm0, xmm1 // accumulate 
-			addps	xmm0, xmm2 // accumulate
-			addps	xmm0, xmm7 // add in the row 4 which holds the x,y,z translation. assumes w=1 (vertex-w, not weight)
-
-			movaps  [edi], xmm0 // store aligned in output array
-
-			// load Normal into xmm0.
-			movaps	xmm0, [esi + 0x10]  // change aps to ups when input no longer 16-byte aligned
-			movaps	xmm1, xmm0  // 
-			movaps	xmm2, xmm0
-			shufps	xmm0, xmm0, _MM_SHUFFLE(0,0,0,0); // since UV sits between vertex and normal, normal starts at element 1, not 0
-			shufps	xmm1, xmm1, _MM_SHUFFLE(1,1,1,1);
-			shufps	xmm2, xmm2, _MM_SHUFFLE(2,2,2,2);
-			mulps	xmm0, xmm4 // multiply by matrix
-			mulps	xmm1, xmm5 // multiply
-			mulps	xmm2, xmm6 // multiply
-			addps	xmm0, xmm1 // accumulate
-			addps	xmm0, xmm2 // accumulate. note: do not add translation component to normals, save time too
-			movaps  [edi + 0x10], xmm0 // store aligned 
-		}
-
-		*(LLVector2*)(vout + (AVATAR_OFFSET_TEX0/sizeof(F32))) = texcoords[index]; // write texcoord into appropriate spot. 
-	}
-}
-
-#elif LL_LINUX
-
-void blend_SSE_32_32_batch(const int vert_offset, const int vert_count, float* output,
-					 LLStrider<LLVector3>& vertices, LLStrider<LLVector2>& texcoords, LLStrider<LLVector3>& normals, LLStrider<F32>& weights)
-{
-    assert(0);
-}
-
-#elif LL_DARWIN
-// AltiVec versions of the same...
-
-static inline vector float loadAlign(int offset, vector float *addr)
-{
-	vector float in0 = vec_ld(offset, addr);
-	vector float in1 = vec_ld(offset + 16, addr);
-	vector unsigned char perm = vec_lvsl(0, (unsigned char*)addr);
-	
-	return(vec_perm(in0, in1, perm));
-}
-
-static inline void storeAlign(vector float v, int offset, vector float *addr)
-{
-	vector float in0 = vec_ld(offset, addr);
-	vector float in1 = vec_ld(offset + 16, addr);
-	vector unsigned char perm = vec_lvsr(0, (unsigned char *)addr);
-	vector float temp = vec_perm(v, v, perm);
-	vector unsigned char mask = (vector unsigned char)vec_cmpgt(perm, vec_splat_u8(15));
-	
-	in0 = vec_sel(in0, temp, (vector unsigned int)mask);
-	in1 = vec_sel(temp, in1, (vector unsigned int)mask);
-
-	vec_st(in0, offset, addr);
-	vec_st(in1, offset + 16, addr);
-}
-
-void blend_SSE_32_32_batch(const int vert_offset, const int vert_count, float* output,
-					 LLStrider<LLVector3>& vertices, LLStrider<LLVector2>& texcoords, LLStrider<LLVector3>& normals, LLStrider<F32>& weights)
-{
-	F32 last_weight = F32_MAX;
-//	LLMatrix4 &blend_mat = gBlendMat;
-
-	vector float matrix0_0, matrix0_1, matrix0_2, matrix0_3;
-	vector unsigned char out0perm = (vector unsigned char) ( 0x10,0x11,0x12,0x13, 0x14,0x15,0x16,0x17, 0x18,0x19,0x1A,0x1B, 0x0C,0x0D,0x0E,0x0F ); 
-// 	vector unsigned char out1perm = (vector unsigned char) ( 0x00,0x01,0x02,0x03, 0x10,0x11,0x12,0x13, 0x14,0x15,0x16,0x17, 0x18,0x19,0x1A,0x1B ); 
-	vector unsigned char out1perm = (vector unsigned char) ( 0x10,0x11,0x12,0x13, 0x14,0x15,0x16,0x17, 0x18,0x19,0x1A,0x1B, 0x0C,0x0D,0x0E,0x0F ); 
-
-	vector float zero = (vector float)vec_splat_u32(0);
-
-	for (U32 index = vert_offset; index < vert_offset + vert_count; index++)
-	{
-		F32     w  = weights  [index];        // register copy of weight
-		F32  *vin  = &vertices[index].mV[0];  // pointer to input vertex data, assumed to be V3+T2+N3+whatever
-		F32  *vout = output + index * (AVATAR_VERTEX_BYTES/sizeof(F32));    // pointer to the output vertex data, assumed to be 16 byte aligned
-		
-		// MBW -- XXX -- If this isn't the case, this code gets more complicated.
-		if(0x0000000F & (U32)vin)
-		{
-			llerrs << "blend_SSE_batch: input not 16-byte aligned!" << llendl;
-		}
-		if(0x0000000F & (U32)vout)
-		{
-			llerrs << "blend_SSE_batch: output not 16-byte aligned!" << llendl;
-		}
-//		if(0x0000000F & (U32)&(blend_mat.mMatrix))
-//		{
-//			llerrs << "blend_SSE_batch: blend_mat not 16-byte aligned!" << llendl;
-//		}
-		
-		if (w == last_weight)
-		{
-			// load last blended matrix
-			// Still loaded from last time through the loop.
-//			matrix0_0 = vec_ld(0x00, (vector float*)&(blend_mat.mMatrix));
-//			matrix0_1 = vec_ld(0x10, (vector float*)&(blend_mat.mMatrix));
-//			matrix0_2 = vec_ld(0x20, (vector float*)&(blend_mat.mMatrix));
-//			matrix0_3 = vec_ld(0x30, (vector float*)&(blend_mat.mMatrix));
-		}
-		else
-		{
-			last_weight = w;
-			S32 joint = llfloor(w);
-			w -= joint;
-
-			LLMatrix4 &m0 = gJointMat[joint+1];
-			LLMatrix4 &m1 = gJointMat[joint+0];
-
-			// load Matrix 0 into vector registers
-			matrix0_0 = vec_ld(0x00, (vector float*)&(m0.mMatrix));
-			matrix0_1 = vec_ld(0x10, (vector float*)&(m0.mMatrix));
-			matrix0_2 = vec_ld(0x20, (vector float*)&(m0.mMatrix));
-			matrix0_3 = vec_ld(0x30, (vector float*)&(m0.mMatrix));
-
-			// if w == 1.0f, we don't need to blend.
-			// but since we do the trick of blending the matrices, here, if w != 1.0,
-			// we load Matrix 1 into the other 4 SSE registers and blend both matrices
-			// based on the weight (which we load ingo a 16-byte aligned vector: w,w,w,w)
-
-			if (w != 1.0f) 
-			{
-				vector float matrix1_0, matrix1_1, matrix1_2, matrix1_3;
-
-				// This loads the weight somewhere in the vector register
-				vector float weight = vec_lde(0, &(w));
-				// and this splats it to all elements.
-				weight = vec_splat(vec_perm(weight, weight, vec_lvsl(0, &(w))), 0);
-
-				// do blending of matrices instead of verts and normals -- faster
-				matrix1_0 = vec_ld(0x00, (vector float*)&(m1.mMatrix));
-				matrix1_1 = vec_ld(0x10, (vector float*)&(m1.mMatrix));
-				matrix1_2 = vec_ld(0x20, (vector float*)&(m1.mMatrix));
-				matrix1_3 = vec_ld(0x30, (vector float*)&(m1.mMatrix));
-				
-				// m0[col] = ((m0[col] - m1[col]) * weight) + m1[col];
-				matrix0_0 = vec_madd(vec_sub(matrix0_0, matrix1_0), weight, matrix1_0);
-				matrix0_1 = vec_madd(vec_sub(matrix0_1, matrix1_1), weight, matrix1_1);
-				matrix0_2 = vec_madd(vec_sub(matrix0_2, matrix1_2), weight, matrix1_2);
-				matrix0_3 = vec_madd(vec_sub(matrix0_3, matrix1_3), weight, matrix1_3);
-			}
-
-			// save off blended matrix
-//			vec_st(matrix0_0, 0x00, (vector float*)&(blend_mat.mMatrix));
-//			vec_st(matrix0_1, 0x10, (vector float*)&(blend_mat.mMatrix));
-//			vec_st(matrix0_2, 0x20, (vector float*)&(blend_mat.mMatrix));
-//			vec_st(matrix0_3, 0x30, (vector float*)&(blend_mat.mMatrix));
-		}
-
-		// now, we have either a blended matrix in matrix0_0-3 or the original Matrix 0
-		// we then multiply each vertex and normal by this one matrix.
-
-		// For SSE2, we would try to keep the original two matrices in other registers
-		// and avoid reloading them. However, they should ramain in L1 cache in the 
-		// current case.
-
-		// One possible optimization would be to sort the vertices by weight instead
-		// of just index (we still want to uniqify). If we note when two or more vertices
-		// share the same weight, we can avoid doing the middle SSE code above and just
-		// re-use the blended matrix for those vertices
-
-
-		// now, we do the actual vertex blending
-
-		vector float in0 = vec_ld(AVATAR_OFFSET_POS, (vector float*)vin);
-		vector float in1 = vec_ld(AVATAR_OFFSET_NORMAL, (vector float*)vin);
-	
-		// Matrix multiply vertex
-		vector float out0 = vec_madd
-		(
-			vec_splat(in0, 0), 
-			matrix0_0, 
-			vec_madd
-			(
-				vec_splat(in0, 1),
-				matrix0_1,
-				vec_madd
-				(
-					vec_splat(in0, 2),
-					matrix0_2,
-					matrix0_3
-				)
-			)
-		);
-		
-		// Matrix multiply normal
-		vector float out1 = vec_madd
-		(
-			vec_splat(in1, 0), 
-			matrix0_0, 
-			vec_madd
-			(
-				vec_splat(in1, 1),
-				matrix0_1,
-				vec_madd
-				(
-					vec_splat(in1, 2),
-					matrix0_2,
-					// no translation for normals
-					(vector float)vec_splat_u32(0)
-				)
-			)
-		);
-
-		// indexed store
-		vec_stl(vec_perm(in0, out0, out0perm), AVATAR_OFFSET_POS,    (vector float*)vout); // Pos
-		vec_stl(vec_perm(in1, out1, out1perm), AVATAR_OFFSET_NORMAL, (vector float*)vout); // Norm
-		*(LLVector2*)(vout + (AVATAR_OFFSET_TEX0/sizeof(F32))) = texcoords[index]; // write texcoord into appropriate spot. 
-	}
-}
-
-#endif
-
-
-void llDrawElementsBatchBlend(const U32 vert_offset, const U32 vert_count, LLFace *face, const S32 index_count, const U32 *indices)
-{
-	U8* gAGPVertices = gPipeline.bufferGetScratchMemory();
-	
-	if (gAGPVertices)
-	{
-		LLStrider<LLVector3> vertices;
-		LLStrider<LLVector3> normals; 
-		LLStrider<LLVector2> tcoords0;
-		LLStrider<F32>       weights; 
-
-		LLStrider<LLVector3> o_vertices;
-		LLStrider<LLVector3> o_normals;
-		LLStrider<LLVector2> o_texcoords0;
-
-		
-		LLStrider<LLVector3> binormals; 
-		LLStrider<LLVector2> o_texcoords1;
-		// get the source vertices from the draw pool. We index these ourselves, as there was
-		// no guarantee the indices for a single jointmesh were contigious
-		
-		LLDrawPool *pool = face->getPool();
-		pool->getVertexStrider      (vertices,  0);
-		pool->getTexCoordStrider   (tcoords0,  0, 0);
-		pool->getNormalStrider      (normals,   0);
-		pool->getBinormalStrider    (binormals, 0);
-		pool->getVertexWeightStrider(weights,   0);
-
-		// load the addresses of the output striders
-		o_vertices  = (LLVector3*)(gAGPVertices + AVATAR_OFFSET_POS);		o_vertices.setStride(  AVATAR_VERTEX_BYTES);
-		o_normals   = (LLVector3*)(gAGPVertices + AVATAR_OFFSET_NORMAL);	o_normals.setStride(   AVATAR_VERTEX_BYTES);
-		o_texcoords0= (LLVector2*)(gAGPVertices + AVATAR_OFFSET_TEX0);		o_texcoords0.setStride(AVATAR_VERTEX_BYTES);
-		o_texcoords1= (LLVector2*)(gAGPVertices + AVATAR_OFFSET_TEX1);		o_texcoords1.setStride(AVATAR_VERTEX_BYTES);
-
-#if !LL_LINUX // !!! *TODO: do the linux implementation
-		if (gGLManager.mSoftwareBlendSSE)
-		{
-			// do SSE blend without binormals or extra texcoords
-			blend_SSE_32_32_batch(vert_offset, vert_count, (float*)gAGPVertices,
-								  vertices, tcoords0, normals, weights);
-		}
-		else // fully backwards compatible software blending, no SSE
-#endif
-		{
-			LLVector4 tpos0, tnorm0, tpos1, tnorm1, tbinorm0, tbinorm1;
-			F32 last_weight = F32_MAX;
-			LLMatrix3 gBlendRotMat;
-
-			{
-				for (U32 index=vert_offset; index < vert_offset + vert_count; index++)
-				{
-					// blend by first matrix
-					F32     w = weights [index];
-					
-					if (w != last_weight)
-					{
-						last_weight = w;
-
-						S32 joint = llfloor(w);
-						w -= joint;
-
-						LLMatrix4 &m0 = gJointMat[joint+1];
-						LLMatrix4 &m1 = gJointMat[joint+0];
-						LLMatrix3 &n0 = gJointRot[joint+1];
-						LLMatrix3 &n1 = gJointRot[joint+0];
-
-						if (w == 1.0f)
-						{
-							gBlendMat = m0;
-							gBlendRotMat = n0;
-						}	
-						else
-						{
-							gBlendMat.mMatrix[VX][VX] = lerp(m1.mMatrix[VX][VX], m0.mMatrix[VX][VX], w);
-							gBlendMat.mMatrix[VX][VY] = lerp(m1.mMatrix[VX][VY], m0.mMatrix[VX][VY], w);
-							gBlendMat.mMatrix[VX][VZ] = lerp(m1.mMatrix[VX][VZ], m0.mMatrix[VX][VZ], w);
-
-							gBlendMat.mMatrix[VY][VX] = lerp(m1.mMatrix[VY][VX], m0.mMatrix[VY][VX], w);
-							gBlendMat.mMatrix[VY][VY] = lerp(m1.mMatrix[VY][VY], m0.mMatrix[VY][VY], w);
-							gBlendMat.mMatrix[VY][VZ] = lerp(m1.mMatrix[VY][VZ], m0.mMatrix[VY][VZ], w);
-
-							gBlendMat.mMatrix[VZ][VX] = lerp(m1.mMatrix[VZ][VX], m0.mMatrix[VZ][VX], w);
-							gBlendMat.mMatrix[VZ][VY] = lerp(m1.mMatrix[VZ][VY], m0.mMatrix[VZ][VY], w);
-							gBlendMat.mMatrix[VZ][VZ] = lerp(m1.mMatrix[VZ][VZ], m0.mMatrix[VZ][VZ], w);
-
-							gBlendMat.mMatrix[VW][VX] = lerp(m1.mMatrix[VW][VX], m0.mMatrix[VW][VX], w);
-							gBlendMat.mMatrix[VW][VY] = lerp(m1.mMatrix[VW][VY], m0.mMatrix[VW][VY], w);
-							gBlendMat.mMatrix[VW][VZ] = lerp(m1.mMatrix[VW][VZ], m0.mMatrix[VW][VZ], w);
-
-							gBlendRotMat.mMatrix[VX][VX] = lerp(n1.mMatrix[VX][VX], n0.mMatrix[VX][VX], w);
-							gBlendRotMat.mMatrix[VX][VY] = lerp(n1.mMatrix[VX][VY], n0.mMatrix[VX][VY], w);
-							gBlendRotMat.mMatrix[VX][VZ] = lerp(n1.mMatrix[VX][VZ], n0.mMatrix[VX][VZ], w);
-
-							gBlendRotMat.mMatrix[VY][VX] = lerp(n1.mMatrix[VY][VX], n0.mMatrix[VY][VX], w);
-							gBlendRotMat.mMatrix[VY][VY] = lerp(n1.mMatrix[VY][VY], n0.mMatrix[VY][VY], w);
-							gBlendRotMat.mMatrix[VY][VZ] = lerp(n1.mMatrix[VY][VZ], n0.mMatrix[VY][VZ], w);
-
-							gBlendRotMat.mMatrix[VZ][VX] = lerp(n1.mMatrix[VZ][VX], n0.mMatrix[VZ][VX], w);
-							gBlendRotMat.mMatrix[VZ][VY] = lerp(n1.mMatrix[VZ][VY], n0.mMatrix[VZ][VY], w);
-							gBlendRotMat.mMatrix[VZ][VZ] = lerp(n1.mMatrix[VZ][VZ], n0.mMatrix[VZ][VZ], w);
-						}
-					}
-
-					// write result
-					o_vertices  [index]     = vertices[index] * gBlendMat;
-					o_normals   [index]     = normals [index] * gBlendRotMat;
-					o_texcoords0[index]		= tcoords0[index];
-
-					/*
-					// Verification code.  Leave this here.  It's useful for keeping the SSE and non-SSE versions in sync.
-					LLVector3 temp;
-					temp = tpos0;
-					if( (o_vertices[index] - temp).magVecSquared() > 0.001f )
-					{
-						llerrs << "V SSE: " << o_vertices[index] << " v. " << temp << llendl;
-					}
-	
-					temp = tnorm0;
-					if( (o_normals[index] - temp).magVecSquared() > 0.001f )
-					{
-						llerrs << "N SSE: " << o_normals[index] << " v. " << temp << llendl;
-					}
-	
-					if( (o_texcoords0[index] - tcoords0[index]).magVecSquared() > 0.001f )
-					{
-						llerrs << "T0 SSE: " << o_texcoords0[index] << " v. " << tcoords0[index] << llendl;
-					}
-					*/
-				}
-			}
-		}
-
-#if LL_DARWIN
-		// *HACK* *CHOKE* *PUKE*
-		// No way does this belong here.
-		glFlushVertexArrayRangeAPPLE(AVATAR_VERTEX_BYTES * vert_count, gAGPVertices + (AVATAR_VERTEX_BYTES * vert_offset));
-#endif
-		glDrawElements(GL_TRIANGLES, index_count, GL_UNSIGNED_INT, indices); // draw it!
+		glDrawElements(mode,count,type,indices);
 	}
 	else
 	{
-		glDrawElements(GL_TRIANGLES, index_count, GL_UNSIGNED_INT, indices);
+		glDrawRangeElements(mode,start,end,count,type,indices);
 	}
 }
 
-
-
-//--------------------------------------------------------------------
-// DrawElements
-
-// works just like glDrawElements, except it assumes GL_TRIANGLES and GL_UNSIGNED_INT indices
-
-// why? because the destination buffer may not be the AGP buffer and the eyes do not use blending
-// separate the eyes into their own drawpools and this code goes away.
-
-//--------------------------------------------------------------------
-
-void llDrawElements(const S32 count, const U32 *indices, LLFace *face)
-{
-	U8* gAGPVertices = gPipeline.bufferGetScratchMemory();
-	
-	if (gAGPVertices)
-	{
-#if LL_DARWIN
-		U32   minIndex = indices[0];
-		U32   maxIndex = indices[0];
-#endif
-		{
-			LLStrider<LLVector3> vertices;
-			LLStrider<LLVector3> normals; 
-			LLStrider<LLVector2> tcoords;
-			LLStrider<F32>       weights; 
-
-			LLStrider<LLVector3> o_vertices;
-			LLStrider<LLVector3> o_normals;
-			LLStrider<LLVector2> o_texcoords0;
-
-			LLDrawPool *pool = face->getPool();
-			pool->getVertexStrider      (vertices,0);
-			pool->getNormalStrider      (normals, 0);
-			pool->getTexCoordStrider    (tcoords, 0);
-
-			o_vertices  = (LLVector3*)(gAGPVertices + AVATAR_OFFSET_POS);		o_vertices.setStride(  AVATAR_VERTEX_BYTES);
-			o_normals   = (LLVector3*)(gAGPVertices + AVATAR_OFFSET_NORMAL);	o_normals.setStride(   AVATAR_VERTEX_BYTES);
-			o_texcoords0= (LLVector2*)(gAGPVertices + AVATAR_OFFSET_TEX0);		o_texcoords0.setStride(AVATAR_VERTEX_BYTES);
-
-			for (S32 i=0; i < count; i++)
-			{
-				U32   index = indices[i];
-
-				o_vertices  [index] = vertices[index];
-				o_normals   [index] = normals [index];
-				o_texcoords0[index] = tcoords [index];
-
-#if LL_DARWIN
-				maxIndex = llmax(index, maxIndex);
-				minIndex = llmin(index, minIndex);
-#endif
-			}
-		}
-
-#if LL_DARWIN
-		// *HACK* *CHOKE* *PUKE*
-		// No way does this belong here.
-		glFlushVertexArrayRangeAPPLE(AVATAR_VERTEX_BYTES * (maxIndex + 1 - minIndex), gAGPVertices + (AVATAR_VERTEX_BYTES * minIndex));
-#endif
-
-		glDrawElements(GL_TRIANGLES, count, GL_UNSIGNED_INT, indices);
-	}
-	else
-	{
-		glDrawElements(GL_TRIANGLES, count, GL_UNSIGNED_INT, indices);
-	}
-}
-
-
 //--------------------------------------------------------------------
 // LLViewerJointMesh::drawShape()
 //--------------------------------------------------------------------
-U32 LLViewerJointMesh::drawShape( F32 pixelArea )
+U32 LLViewerJointMesh::drawShape( F32 pixelArea, BOOL first_pass)
 {
-	if (!mValid || !mVisible) return 0;
-
-	U32 triangle_count = 0;
-
-	//----------------------------------------------------------------
-	// if no mesh bail out now
-	//----------------------------------------------------------------
-	if ( !mMesh || !mFace)
+	if (!mValid || !mMesh || !mFace || !mVisible || 
+		mFace->mVertexBuffer.isNull() ||
+		mMesh->getNumFaces() == 0) 
 	{
 		return 0;
 	}
 
-	//----------------------------------------------------------------
-	// if we have no faces, bail out now
-	//----------------------------------------------------------------
-	if ( mMesh->getNumFaces() == 0 )
-	{
-		return 0;
-	}
+	U32 triangle_count = 0;
 
 	stop_glerror();
 	
 	//----------------------------------------------------------------
 	// setup current color
 	//----------------------------------------------------------------
-	if (gRenderForSelect)
-	{
-		S32 name = mFace->getDrawable() ? mFace->getDrawable()->getVObj()->mGLName : 0;
-		LLColor4U color((U8)(name >> 16), (U8)(name >> 8), (U8)name, 0xff);
-		LLColor4 color_float(color);
-	
-		glColor4f(color_float.mV[0], color_float.mV[1], color_float.mV[2], 1.f);
-	}
-	else
+	if (!gRenderForSelect)
 	{
 		if ((mFace->getPool()->getVertexShaderLevel() > 0))
 		{
@@ -1150,7 +572,6 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea )
 
 	stop_glerror();
 	
-// 	LLGLSSpecular specular(mSpecular, gRenderForSelect ? 0.0f : mShiny);
 	LLGLSSpecular specular(LLColor4(1.f,1.f,1.f,1.f), gRenderForSelect ? 0.0f : mShiny && !(mFace->getPool()->getVertexShaderLevel() > 0));
 
 	LLGLEnable texture_2d((gRenderForSelect && isTransparent()) ? GL_TEXTURE_2D : 0);
@@ -1160,11 +581,6 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea )
 	//----------------------------------------------------------------
 	llassert( !(mTexture.notNull() && mLayerSet) );  // mutually exclusive
 
-	//GLuint test_image_name = 0;
-
-	// 
-	LLGLState force_alpha_test(GL_ALPHA_TEST, isTransparent());
-
 	if (mTestImageName)
 	{
 		LLImageGL::bindExternalTexture( mTestImageName, 0, GL_TEXTURE_2D ); 
@@ -1217,11 +633,12 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea )
 		gImageList.getImage(IMG_DEFAULT_AVATAR)->bind();
 	}
 	
+	LLGLDisable tex(gRenderForSelect && !isTransparent() ? GL_TEXTURE_2D : 0);
+
 	if (gRenderForSelect)
 	{
 		if (isTransparent())
 		{
-			//gGLSObjectSelectDepthAlpha.set();
 			glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE,		GL_COMBINE_ARB);
 			glTexEnvi(GL_TEXTURE_ENV, GL_COMBINE_RGB_ARB,		GL_REPLACE);
 			glTexEnvi(GL_TEXTURE_ENV, GL_COMBINE_ALPHA_ARB,		GL_MODULATE);
@@ -1232,19 +649,14 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea )
 			glTexEnvi(GL_TEXTURE_ENV, GL_SOURCE0_ALPHA_ARB,		GL_TEXTURE);  // GL_TEXTURE_ENV_COLOR is set in renderPass1
 			glTexEnvi(GL_TEXTURE_ENV, GL_OPERAND0_ALPHA_ARB,	GL_SRC_ALPHA);
 		}
-		else
-		{
-			//gGLSObjectSelectDepth.set();
-		}
 	}
 	else
 	{
 		//----------------------------------------------------------------
 		// by default, backface culling is enabled
 		//----------------------------------------------------------------
-		if (sRenderPass == AVATAR_RENDER_PASS_CLOTHING_INNER)
+		/*if (sRenderPass == AVATAR_RENDER_PASS_CLOTHING_INNER)
 		{
-			//LLGLSPipelineAvatar gls_pipeline_avatar;
 			LLImageGL::bindExternalTexture( sClothingMaskImageName, 1, GL_TEXTURE_2D );
 
 			glClientActiveTextureARB(GL_TEXTURE0_ARB);
@@ -1284,7 +696,6 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea )
 		}
 		else if (sRenderPass == AVATAR_RENDER_PASS_CLOTHING_OUTER)
 		{
-			//gGLSPipelineAvatarAlphaPass1.set();
 			glAlphaFunc(GL_GREATER, 0.1f);
 			LLImageGL::bindExternalTexture( sClothingMaskImageName, 1, GL_TEXTURE_2D );
 
@@ -1315,81 +726,48 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea )
 
 			glTexEnvi(GL_TEXTURE_ENV, GL_SOURCE0_ALPHA_ARB,		GL_TEXTURE);
 			glTexEnvi(GL_TEXTURE_ENV, GL_OPERAND0_ALPHA_ARB,	GL_SRC_ALPHA);
-		}
-		else if ( isTransparent())
-		{
-			//gGLSNoCullFaces.set();
-		}
-		else
-		{
-			//gGLSCullFaces.set();
-		}
+		}*/
 	}
 
-	if (mMesh->hasWeights())
-	{
-		uploadJointMatrices();
+	mFace->mVertexBuffer->setBuffer(sRenderMask);
 
+	U32 start = mMesh->mFaceVertexOffset;
+	U32 end = start + mMesh->mFaceVertexCount - 1;
+	U32 count = mMesh->mFaceIndexCount;
+	U32* indicesp = ((U32*) mFace->mVertexBuffer->getIndicesPointer()) + mMesh->mFaceIndexOffset;
 
+	if (mMesh->hasWeights())
+	{
 		if ((mFace->getPool()->getVertexShaderLevel() > 0))
 		{
-			glMatrixMode(GL_MODELVIEW);
-			glPushMatrix();
-			glLoadIdentity();
-
-			glDrawElements(GL_TRIANGLES, mMesh->mFaceIndexCount, GL_UNSIGNED_INT, mMesh->getIndices());
-
-			glPopMatrix();
+			if (first_pass)
+			{
+				uploadJointMatrices();
+			}
+			llDrawRangeElements(GL_TRIANGLES, start, end, count, GL_UNSIGNED_INT, indicesp);
 		}
 		else
 		{
-			if (mFace->getGeomIndex() < 0)
-			{
-				llerrs << "Invalid geometry index in LLViewerJointMesh::drawShape() " << mFace->getGeomIndex() << llendl;
-			}
-
-			if ((S32)(mMesh->mFaceVertexOffset + mMesh->mFaceVertexCount) > mFace->getGeomCount())
-			{
-				((LLVOAvatar*)mFace->getDrawable()->getVObj())->mRoot.dump();
-				llerrs << "Rendering outside of vertex bounds with mesh " << mName << " at pixel area " << pixelArea << llendl;
-			}
-			llDrawElementsBatchBlend(mMesh->mFaceVertexOffset, mMesh->mFaceVertexCount,
-									 mFace, mMesh->mFaceIndexCount, mMesh->getIndices());
+			llDrawRangeElements(GL_TRIANGLES, start, end, count, GL_UNSIGNED_INT, indicesp);
 		}
-
 	}
 	else
 	{
 		glPushMatrix();
 		LLMatrix4 jointToWorld = getWorldMatrix();
-		jointToWorld *= gCamera->getModelview();
-		glLoadMatrixf((GLfloat*)jointToWorld.mMatrix);
-
-		if ((mFace->getPool()->getVertexShaderLevel() > 0))
-		{
-			glDrawElements(GL_TRIANGLES, mMesh->mFaceIndexCount, GL_UNSIGNED_INT, mMesh->getIndices());
-		}
-		else // this else clause handles non-weighted vertices. llDrawElements just copies and draws
-		{
-			llDrawElements(mMesh->mFaceIndexCount, mMesh->getIndices(), mFace);
-		}
-		
+		glMultMatrixf((GLfloat*)jointToWorld.mMatrix);
+		llDrawRangeElements(GL_TRIANGLES, start, end, count, GL_UNSIGNED_INT, indicesp);
 		glPopMatrix();
 	}
 
 	triangle_count += mMesh->mFaceIndexCount;
-
-	if (gRenderForSelect)
-	{
-		glColor4fv(mColor.mV);
-	}
-
+	
 	if (mTestImageName)
 	{
 		glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_MODULATE);
 	}
 
-	if (sRenderPass != AVATAR_RENDER_PASS_SINGLE)
+	/*if (sRenderPass != AVATAR_RENDER_PASS_SINGLE)
 	{
 		LLImageGL::unbindTexture(1, GL_TEXTURE_2D);
 		glActiveTextureARB(GL_TEXTURE1_ARB);
@@ -1402,7 +780,7 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea )
 
 		glTexEnvi(GL_TEXTURE_ENV, GL_COMBINE_RGB_ARB,		GL_MODULATE);
 		glAlphaFunc(GL_GREATER, 0.01f);
-	}
+	}*/
 
 	if (mTexture.notNull()) {
 		if (!mTexture->getClampS()) {
@@ -1419,19 +797,20 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea )
 //-----------------------------------------------------------------------------
 // updateFaceSizes()
 //-----------------------------------------------------------------------------
-void LLViewerJointMesh::updateFaceSizes(U32 &num_vertices, F32 pixel_area)
+void LLViewerJointMesh::updateFaceSizes(U32 &num_vertices, U32& num_indices, F32 pixel_area)
 {
 	// Do a pre-alloc pass to determine sizes of data.
 	if (mMesh && mValid)
 	{
 		mMesh->mFaceVertexOffset = num_vertices;
 		mMesh->mFaceVertexCount = mMesh->getNumVertices();
+		mMesh->mFaceIndexOffset = num_indices;
+		mMesh->mFaceIndexCount = mMesh->getSharedData()->mNumTriangleIndices;
+
 		mMesh->getReferenceMesh()->mCurVertexCount = mMesh->mFaceVertexCount;
-		num_vertices += mMesh->getNumVertices();
 
-		mMesh->mFaceIndexCount = mMesh->getSharedData()->mNumTriangleIndices;
-	
-		mMesh->getSharedData()->genIndices(mMesh->mFaceVertexOffset);
+		num_vertices += mMesh->getNumVertices();
+		num_indices += mMesh->mFaceIndexCount;
 	}
 }
 
@@ -1441,9 +820,7 @@ void LLViewerJointMesh::updateFaceSizes(U32 &num_vertices, F32 pixel_area)
 void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_wind)
 {
 	U32 i;
-
-	if (!mValid) return;
-
+	
 	mFace = face;
 
 	LLStrider<LLVector3> verticesp;
@@ -1452,13 +829,15 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w
 	LLStrider<LLVector2> tex_coordsp;
 	LLStrider<F32>		 vertex_weightsp;
 	LLStrider<LLVector4> clothing_weightsp;
+	LLStrider<U32> indicesp;
 
 	// Copy data into the faces from the polymesh data.
-	if (mMesh)
+	if (mMesh && mValid)
 	{
 		if (mMesh->getNumVertices())
 		{
 			S32 index = face->getGeometryAvatar(verticesp, normalsp, binormalsp, tex_coordsp, vertex_weightsp, clothing_weightsp);
+			face->mVertexBuffer->getIndexStrider(indicesp);
 
 			if (-1 == index)
 			{
@@ -1474,11 +853,20 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w
 				vertex_weightsp[mMesh->mFaceVertexOffset + i] = *(mMesh->getWeights() + i);
 				if (damp_wind)
 				{
-					clothing_weightsp[mMesh->mFaceVertexOffset + i].setVec(0,0,0,0);
+					clothing_weightsp[mMesh->mFaceVertexOffset + i] = LLVector4(0,0,0,0);
 				}
 				else
 				{
-					clothing_weightsp[mMesh->mFaceVertexOffset + i].setVec(*(mMesh->getClothingWeights() + i));
+					clothing_weightsp[mMesh->mFaceVertexOffset + i] = (*(mMesh->getClothingWeights() + i));
+				}
+			}
+
+			for (S32 i = 0; i < mMesh->getNumFaces(); i++)
+			{
+				for (U32 j = 0; j < 3; j++)
+				{
+					U32 k = i*3+j+mMesh->mFaceIndexOffset;
+					indicesp[k] = mMesh->getFaces()[i][j] + mMesh->mFaceVertexOffset;
 				}
 			}
 		}
@@ -1495,6 +883,92 @@ BOOL LLViewerJointMesh::updateLOD(F32 pixel_area, BOOL activate)
 	return (valid != activate);
 }
 
+void LLViewerJointMesh::updateGeometry()
+{
+	if (mValid && mMesh && mFace &&
+		mMesh->hasWeights() &&
+		mFace->mVertexBuffer.notNull() &&
+		gPipeline.getVertexShaderLevel(LLPipeline::SHADER_AVATAR) == 0)
+	{
+		uploadJointMatrices();
+		LLStrider<LLVector3> o_vertices;
+		LLStrider<LLVector3> o_normals;
+
+		//get vertex and normal striders
+		LLVertexBuffer *buffer = mFace->mVertexBuffer;
+		buffer->getVertexStrider(o_vertices,  0);
+		buffer->getNormalStrider(o_normals,   0);
+
+		{
+			LLVector4 tpos0, tnorm0, tpos1, tnorm1, tbinorm0, tbinorm1;
+			F32 last_weight = F32_MAX;
+			LLMatrix3 gBlendRotMat;
+
+		
+			for (U32 index= 0; index < mMesh->getNumVertices(); index++)
+			{
+				// blend by first matrix
+				F32 w = mMesh->getWeights()[index]; 
+				
+				if (w != last_weight)
+				{
+					last_weight = w;
+
+					S32 joint = llfloor(w);
+					w -= joint;
+
+					LLMatrix4 &m0 = gJointMat[joint+1];
+					LLMatrix4 &m1 = gJointMat[joint+0];
+					LLMatrix3 &n0 = gJointRot[joint+1];
+					LLMatrix3 &n1 = gJointRot[joint+0];
+
+					if (w == 1.0f)
+					{
+						gBlendMat = m0;
+						gBlendRotMat = n0;
+					}	
+					else
+					{
+						gBlendMat.mMatrix[VX][VX] = lerp(m1.mMatrix[VX][VX], m0.mMatrix[VX][VX], w);
+						gBlendMat.mMatrix[VX][VY] = lerp(m1.mMatrix[VX][VY], m0.mMatrix[VX][VY], w);
+						gBlendMat.mMatrix[VX][VZ] = lerp(m1.mMatrix[VX][VZ], m0.mMatrix[VX][VZ], w);
+
+						gBlendMat.mMatrix[VY][VX] = lerp(m1.mMatrix[VY][VX], m0.mMatrix[VY][VX], w);
+						gBlendMat.mMatrix[VY][VY] = lerp(m1.mMatrix[VY][VY], m0.mMatrix[VY][VY], w);
+						gBlendMat.mMatrix[VY][VZ] = lerp(m1.mMatrix[VY][VZ], m0.mMatrix[VY][VZ], w);
+
+						gBlendMat.mMatrix[VZ][VX] = lerp(m1.mMatrix[VZ][VX], m0.mMatrix[VZ][VX], w);
+						gBlendMat.mMatrix[VZ][VY] = lerp(m1.mMatrix[VZ][VY], m0.mMatrix[VZ][VY], w);
+						gBlendMat.mMatrix[VZ][VZ] = lerp(m1.mMatrix[VZ][VZ], m0.mMatrix[VZ][VZ], w);
+
+						gBlendMat.mMatrix[VW][VX] = lerp(m1.mMatrix[VW][VX], m0.mMatrix[VW][VX], w);
+						gBlendMat.mMatrix[VW][VY] = lerp(m1.mMatrix[VW][VY], m0.mMatrix[VW][VY], w);
+						gBlendMat.mMatrix[VW][VZ] = lerp(m1.mMatrix[VW][VZ], m0.mMatrix[VW][VZ], w);
+
+						gBlendRotMat.mMatrix[VX][VX] = lerp(n1.mMatrix[VX][VX], n0.mMatrix[VX][VX], w);
+						gBlendRotMat.mMatrix[VX][VY] = lerp(n1.mMatrix[VX][VY], n0.mMatrix[VX][VY], w);
+						gBlendRotMat.mMatrix[VX][VZ] = lerp(n1.mMatrix[VX][VZ], n0.mMatrix[VX][VZ], w);
+
+						gBlendRotMat.mMatrix[VY][VX] = lerp(n1.mMatrix[VY][VX], n0.mMatrix[VY][VX], w);
+						gBlendRotMat.mMatrix[VY][VY] = lerp(n1.mMatrix[VY][VY], n0.mMatrix[VY][VY], w);
+						gBlendRotMat.mMatrix[VY][VZ] = lerp(n1.mMatrix[VY][VZ], n0.mMatrix[VY][VZ], w);
+
+						gBlendRotMat.mMatrix[VZ][VX] = lerp(n1.mMatrix[VZ][VX], n0.mMatrix[VZ][VX], w);
+						gBlendRotMat.mMatrix[VZ][VY] = lerp(n1.mMatrix[VZ][VY], n0.mMatrix[VZ][VY], w);
+						gBlendRotMat.mMatrix[VZ][VZ] = lerp(n1.mMatrix[VZ][VZ], n0.mMatrix[VZ][VZ], w);
+					}
+				}
+
+				// write result
+				U32 bidx = index + mMesh->mFaceVertexOffset;
+
+				o_vertices[bidx] = mMesh->getCoords()[index] * gBlendMat;
+				o_normals[bidx] = mMesh->getNormals()[index] * gBlendRotMat;
+			}
+		}
+	}
+}
+
 void LLViewerJointMesh::dump()
 {
 	if (mValid)
author	Steven Bennetts <steve@lindenlab.com>	2007-03-02 21:25:50 +0000
committer	Steven Bennetts <steve@lindenlab.com>	2007-03-02 21:25:50 +0000
commit	4dabd9c0472deb49573fdafef2fa413e59703f19 (patch)
tree	06c680d6a2047e03838d6548bccd26c7baf9d652 /indra/newview/llviewerjointmesh.cpp
parent	d4462963c6ba5db2088723bbedc7b60f1184c594 (diff)