diff options
Diffstat (limited to 'indra')
| -rw-r--r-- | indra/newview/llpolymesh.cpp | 26 | ||||
| -rw-r--r-- | indra/newview/llpolymesh.h | 12 | ||||
| -rw-r--r-- | indra/newview/llpolymorph.cpp | 13 | ||||
| -rw-r--r-- | indra/newview/llviewerjointmesh.cpp | 145 | ||||
| -rw-r--r-- | indra/newview/llviewerjointmesh_vec.cpp | 2 | 
5 files changed, 101 insertions, 97 deletions
| diff --git a/indra/newview/llpolymesh.cpp b/indra/newview/llpolymesh.cpp index d5a2d66bcf..b8bdbfb2f8 100644 --- a/indra/newview/llpolymesh.cpp +++ b/indra/newview/llpolymesh.cpp @@ -708,15 +708,17 @@ LLPolyMesh::LLPolyMesh(LLPolyMeshSharedData *shared_data, LLPolyMesh *reference_  		mClothingWeights = reference_mesh->mClothingWeights;  	}  	else -	{ +	{ 	   #if 1	// Allocate memory without initializing every vector  		// NOTE: This makes asusmptions about the size of LLVector[234]  		int nverts = mSharedData->mNumVertices; -		int nfloats = nverts * (3*5 + 2 + 4); -		mVertexData = new F32[nfloats]; +		int nfloats = nverts * (2*4 + 3*3 + 2 + 4); + +		//use aligned vertex data to make LLPolyMesh SSE friendly +		mVertexData = (F32*) _mm_malloc(nfloats*4, 16);  		int offset = 0; -		mCoords = 				(LLVector3*)(mVertexData + offset); offset += 3*nverts; -		mNormals = 				(LLVector3*)(mVertexData + offset); offset += 3*nverts; +		mCoords = 				(LLVector4*)(mVertexData + offset); offset += 4*nverts; +		mNormals = 				(LLVector4*)(mVertexData + offset); offset += 4*nverts;  		mScaledNormals = 		(LLVector3*)(mVertexData + offset); offset += 3*nverts;  		mBinormals = 			(LLVector3*)(mVertexData + offset); offset += 3*nverts;  		mScaledBinormals = 		(LLVector3*)(mVertexData + offset); offset += 3*nverts; @@ -757,7 +759,7 @@ LLPolyMesh::~LLPolyMesh()  	delete [] mClothingWeights;  	delete [] mTexCoords;  #else -	delete [] mVertexData; +	_mm_free(mVertexData);  #endif  } @@ -864,7 +866,7 @@ void LLPolyMesh::dumpDiagInfo()  //-----------------------------------------------------------------------------  // getWritableCoords()  //----------------------------------------------------------------------------- -LLVector3 *LLPolyMesh::getWritableCoords() +LLVector4 *LLPolyMesh::getWritableCoords()  {  	return mCoords;  } @@ -872,7 +874,7 @@ LLVector3 *LLPolyMesh::getWritableCoords()  //-----------------------------------------------------------------------------  // getWritableNormals()  //----------------------------------------------------------------------------- -LLVector3 *LLPolyMesh::getWritableNormals() +LLVector4 *LLPolyMesh::getWritableNormals()  {  	return mNormals;  } @@ -927,8 +929,12 @@ void LLPolyMesh::initializeForMorph()  	if (!mSharedData)  		return; -	memcpy(mCoords, mSharedData->mBaseCoords, sizeof(LLVector3) * mSharedData->mNumVertices);	/*Flawfinder: ignore*/ -	memcpy(mNormals, mSharedData->mBaseNormals, sizeof(LLVector3) * mSharedData->mNumVertices);	/*Flawfinder: ignore*/ +	for (U32 i = 0; i < mSharedData->mNumVertices; ++i) +	{ +		mCoords[i] = LLVector4(mSharedData->mBaseCoords[i]); +		mNormals[i] = LLVector4(mSharedData->mBaseNormals[i]); +	} +  	memcpy(mScaledNormals, mSharedData->mBaseNormals, sizeof(LLVector3) * mSharedData->mNumVertices);	/*Flawfinder: ignore*/  	memcpy(mBinormals, mSharedData->mBaseBinormals, sizeof(LLVector3) * mSharedData->mNumVertices);	/*Flawfinder: ignore*/  	memcpy(mScaledBinormals, mSharedData->mBaseBinormals, sizeof(LLVector3) * mSharedData->mNumVertices);		/*Flawfinder: ignore*/ diff --git a/indra/newview/llpolymesh.h b/indra/newview/llpolymesh.h index c2e5451dfe..d86568a1ba 100644 --- a/indra/newview/llpolymesh.h +++ b/indra/newview/llpolymesh.h @@ -223,15 +223,15 @@ public:  	}  	// Get coords -	const LLVector3	*getCoords() const{ +	const LLVector4	*getCoords() const{  		return mCoords;  	}  	// non const version -	LLVector3 *getWritableCoords(); +	LLVector4 *getWritableCoords();  	// Get normals -	const LLVector3	*getNormals() const{  +	const LLVector4	*getNormals() const{   		return mNormals;   	} @@ -253,7 +253,7 @@ public:  	}  	// intermediate morphed normals and output normals -	LLVector3 *getWritableNormals(); +	LLVector4 *getWritableNormals();  	LLVector3 *getScaledNormals();  	LLVector3 *getWritableBinormals(); @@ -347,11 +347,11 @@ protected:  	// Single array of floats for allocation / deletion  	F32						*mVertexData;  	// deformed vertices (resulting from application of morph targets) -	LLVector3				*mCoords; +	LLVector4				*mCoords;  	// deformed normals (resulting from application of morph targets)  	LLVector3				*mScaledNormals;  	// output normals (after normalization) -	LLVector3				*mNormals; +	LLVector4				*mNormals;  	// deformed binormals (resulting from application of morph targets)  	LLVector3				*mScaledBinormals;  	// output binormals (after normalization) diff --git a/indra/newview/llpolymorph.cpp b/indra/newview/llpolymorph.cpp index 80983cad24..2058c351c4 100644 --- a/indra/newview/llpolymorph.cpp +++ b/indra/newview/llpolymorph.cpp @@ -461,10 +461,10 @@ void LLPolyMorphTarget::apply( ESex avatar_sex )  	if (delta_weight != 0.f)  	{  		llassert(!mMesh->isLOD()); -		LLVector3 *coords = mMesh->getWritableCoords(); +		LLVector4 *coords = mMesh->getWritableCoords();  		LLVector3 *scaled_normals = mMesh->getScaledNormals(); -		LLVector3 *normals = mMesh->getWritableNormals(); +		LLVector4 *normals = mMesh->getWritableNormals();  		LLVector3 *scaled_binormals = mMesh->getScaledBinormals();  		LLVector3 *binormals = mMesh->getWritableBinormals(); @@ -484,7 +484,8 @@ void LLPolyMorphTarget::apply( ESex avatar_sex )  				maskWeight = maskWeightArray[vert_index_morph];  			} -			coords[vert_index_mesh] += mMorphData->mCoords[vert_index_morph] * delta_weight * maskWeight; +			coords[vert_index_mesh] += LLVector4(mMorphData->mCoords[vert_index_morph] * delta_weight * maskWeight); +  			if (getInfo()->mIsClothingMorph && clothing_weights)  			{  				LLVector3 clothing_offset = mMorphData->mCoords[vert_index_morph] * delta_weight * maskWeight; @@ -499,7 +500,7 @@ void LLPolyMorphTarget::apply( ESex avatar_sex )  			scaled_normals[vert_index_mesh] += mMorphData->mNormals[vert_index_morph] * delta_weight * maskWeight * NORMAL_SOFTEN_FACTOR;  			LLVector3 normalized_normal = scaled_normals[vert_index_mesh];  			normalized_normal.normVec(); -			normals[vert_index_mesh] = normalized_normal; +			normals[vert_index_mesh] = LLVector4(normalized_normal);  			// calculate new binormals  			scaled_binormals[vert_index_mesh] += mMorphData->mBinormals[vert_index_morph] * delta_weight * maskWeight * NORMAL_SOFTEN_FACTOR; @@ -548,7 +549,7 @@ void	LLPolyMorphTarget::applyMask(U8 *maskTextureData, S32 width, S32 height, S3  		if (maskWeights)  		{ -			LLVector3 *coords = mMesh->getWritableCoords(); +			LLVector4 *coords = mMesh->getWritableCoords();  			LLVector3 *scaled_normals = mMesh->getScaledNormals();  			LLVector3 *scaled_binormals = mMesh->getScaledBinormals();  			LLVector2 *tex_coords = mMesh->getWritableTexCoords(); @@ -559,7 +560,7 @@ void	LLPolyMorphTarget::applyMask(U8 *maskTextureData, S32 width, S32 height, S3  				S32 out_vert = mMorphData->mVertexIndices[vert];  				// remove effect of existing masked morph -				coords[out_vert] -= mMorphData->mCoords[vert] * lastMaskWeight; +				coords[out_vert] -= LLVector4(mMorphData->mCoords[vert]) * lastMaskWeight;  				scaled_normals[out_vert] -= mMorphData->mNormals[vert] * lastMaskWeight * NORMAL_SOFTEN_FACTOR;  				scaled_binormals[out_vert] -= mMorphData->mBinormals[vert] * lastMaskWeight * NORMAL_SOFTEN_FACTOR;  				tex_coords[out_vert] -= mMorphData->mTexCoords[vert] * lastMaskWeight; diff --git a/indra/newview/llviewerjointmesh.cpp b/indra/newview/llviewerjointmesh.cpp index deb3d8fd97..294dfdcb55 100644 --- a/indra/newview/llviewerjointmesh.cpp +++ b/indra/newview/llviewerjointmesh.cpp @@ -61,6 +61,7 @@  #include "v4math.h"  #include "m3math.h"  #include "m4math.h" +#include "llmatrix4a.h"  #if !LL_DARWIN && !LL_LINUX && !LL_SOLARIS  extern PFNGLWEIGHTPOINTERARBPROC glWeightPointerARB; @@ -382,6 +383,7 @@ const S32 NUM_AXES = 3;  // pivot parent 0-n -- child = n+1  static LLMatrix4	gJointMatUnaligned[32]; +static LLMatrix4a	gJointMatAligned[32];  static LLMatrix3	gJointRotUnaligned[32];  static LLVector4	gJointPivot[32]; @@ -467,6 +469,14 @@ void LLViewerJointMesh::uploadJointMatrices()  		glUniform4fvARB(gAvatarMatrixParam, 45, mat);  		stop_glerror();  	} +	else +	{ +		//load gJointMatUnaligned into gJointMatAligned +		for (joint_num = 0; joint_num < reference_mesh->mJointRenderData.count(); ++joint_num) +		{ +			gJointMatAligned[joint_num].loadu(gJointMatUnaligned[joint_num]); +		} +	}  }  //-------------------------------------------------------------------- @@ -723,7 +733,7 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w  					v[0] = coords[0];   					v[1] = coords[1];   					v[2] = coords[2];		 -					coords += 3; +					coords += 4;  					v += skip;  				} @@ -732,12 +742,12 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w  					n[0] = normals[0];   					n[1] = normals[1];  					n[2] = normals[2]; -					normals += 3; +					normals += 4;  					n += skip;  				}  			}  			else -				{ +			{  				U32* __restrict tc = (U32*) tex_coordsp.get();  				U32* __restrict vw = (U32*) vertex_weightsp.get(); @@ -745,18 +755,20 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w  				do  				{ -					v[0] = *(coords++);  -					v[1] = *(coords++);  -					v[2] = *(coords++); +					v[0] = coords[0];  +					v[1] = coords[1];  +					v[2] = coords[2];		 +					coords += 4;  					v += skip;  					tc[0] = *(tex_coords++);   					tc[1] = *(tex_coords++);  					tc += skip; -					n[0] = *(normals++);  -					n[1] = *(normals++); -					n[2] = *(normals++); +					n[0] = normals[0];  +					n[1] = normals[1]; +					n[2] = normals[2]; +					normals += 4;  					n += skip;  					vw[0] = *(weights++); @@ -808,17 +820,17 @@ void LLViewerJointMesh::updateGeometryOriginal(LLFace *mFace, LLPolyMesh *mMesh)  	LLStrider<LLVector3> o_normals;  	//get vertex and normal striders -	LLVertexBuffer *buffer = mFace->mVertexBuffer; +	LLVertexBuffer* buffer = mFace->mVertexBuffer;  	buffer->getVertexStrider(o_vertices,  0);  	buffer->getNormalStrider(o_normals,   0); -	F32 last_weight = F32_MAX; -	LLMatrix4 gBlendMat; -	LLMatrix3 gBlendRotMat; +	//F32 last_weight = F32_MAX; +	LLMatrix4a gBlendMat; + +	__restrict const F32* weights = mMesh->getWeights(); +	__restrict const LLVector4* coords = mMesh->getCoords(); +	__restrict const LLVector4* normals = mMesh->getNormals(); -	const F32* weights = mMesh->getWeights(); -	const LLVector3* coords = mMesh->getCoords(); -	const LLVector3* normals = mMesh->getNormals();  	for (U32 index = 0; index < mMesh->getNumVertices(); index++)  	{  		U32 bidx = index + mMesh->mFaceVertexOffset; @@ -826,71 +838,54 @@ void LLViewerJointMesh::updateGeometryOriginal(LLFace *mFace, LLPolyMesh *mMesh)  		// blend by first matrix  		F32 w = weights[index];  +		LLVector4a coord; +		coord.load4a(coords[index].mV); + +		LLVector4a norm; +		norm.load4a(normals[index].mV); +  		// Maybe we don't have to change gBlendMat.  		// Profiles of a single-avatar scene on a Mac show this to be a very  		// common case.  JC -		if (w == last_weight) +		//if (w != last_weight)  		{ -			o_vertices[bidx] = coords[index] * gBlendMat; -			o_normals[bidx] = normals[index] * gBlendRotMat; -			continue; -		} -		 -		last_weight = w; +			//last_weight = w; -		S32 joint = llfloor(w); -		w -= joint; -		 -		// No lerp required in this case. -		if (w == 1.0f) -		{ -			gBlendMat = gJointMatUnaligned[joint+1]; -			o_vertices[bidx] = coords[index] * gBlendMat; -			gBlendRotMat = gJointRotUnaligned[joint+1]; -			o_normals[bidx] = normals[index] * gBlendRotMat; -			continue; +			S32 joint = llfloor(w); +			w -= joint; +				 +			 +			if (w >= 0.f) +			{ +				// Try to keep all the accesses to the matrix data as close +				// together as possible.  This function is a hot spot on the +				// Mac. JC +				gBlendMat.setLerp(gJointMatAligned[joint+0], +								  gJointMatAligned[joint+1], w); + +				LLVector4a res; +				gBlendMat.affineTransform(coord, res); +				o_vertices[bidx].setVec(res[0], res[1], res[2]); +				gBlendMat.rotate(norm, res); +				o_normals[bidx].setVec(res[0], res[1], res[2]); +			} +			else +			{  // No lerp required in this case. +				LLVector4a res; +				gJointMatAligned[joint].affineTransform(coord, res); +				o_vertices[bidx].setVec(res[0], res[1], res[2]); +				gJointMatAligned[joint].rotate(norm, res); +				o_normals[bidx].setVec(res[0], res[1], res[2]); +			}  		} -		 -		// Try to keep all the accesses to the matrix data as close -		// together as possible.  This function is a hot spot on the -		// Mac. JC -		LLMatrix4 &m0 = gJointMatUnaligned[joint+1]; -		LLMatrix4 &m1 = gJointMatUnaligned[joint+0]; -		 -		gBlendMat.mMatrix[VX][VX] = lerp(m1.mMatrix[VX][VX], m0.mMatrix[VX][VX], w); -		gBlendMat.mMatrix[VX][VY] = lerp(m1.mMatrix[VX][VY], m0.mMatrix[VX][VY], w); -		gBlendMat.mMatrix[VX][VZ] = lerp(m1.mMatrix[VX][VZ], m0.mMatrix[VX][VZ], w); - -		gBlendMat.mMatrix[VY][VX] = lerp(m1.mMatrix[VY][VX], m0.mMatrix[VY][VX], w); -		gBlendMat.mMatrix[VY][VY] = lerp(m1.mMatrix[VY][VY], m0.mMatrix[VY][VY], w); -		gBlendMat.mMatrix[VY][VZ] = lerp(m1.mMatrix[VY][VZ], m0.mMatrix[VY][VZ], w); - -		gBlendMat.mMatrix[VZ][VX] = lerp(m1.mMatrix[VZ][VX], m0.mMatrix[VZ][VX], w); -		gBlendMat.mMatrix[VZ][VY] = lerp(m1.mMatrix[VZ][VY], m0.mMatrix[VZ][VY], w); -		gBlendMat.mMatrix[VZ][VZ] = lerp(m1.mMatrix[VZ][VZ], m0.mMatrix[VZ][VZ], w); - -		gBlendMat.mMatrix[VW][VX] = lerp(m1.mMatrix[VW][VX], m0.mMatrix[VW][VX], w); -		gBlendMat.mMatrix[VW][VY] = lerp(m1.mMatrix[VW][VY], m0.mMatrix[VW][VY], w); -		gBlendMat.mMatrix[VW][VZ] = lerp(m1.mMatrix[VW][VZ], m0.mMatrix[VW][VZ], w); - -		o_vertices[bidx] = coords[index] * gBlendMat; -		 -		LLMatrix3 &n0 = gJointRotUnaligned[joint+1]; -		LLMatrix3 &n1 = gJointRotUnaligned[joint+0]; -		 -		gBlendRotMat.mMatrix[VX][VX] = lerp(n1.mMatrix[VX][VX], n0.mMatrix[VX][VX], w); -		gBlendRotMat.mMatrix[VX][VY] = lerp(n1.mMatrix[VX][VY], n0.mMatrix[VX][VY], w); -		gBlendRotMat.mMatrix[VX][VZ] = lerp(n1.mMatrix[VX][VZ], n0.mMatrix[VX][VZ], w); - -		gBlendRotMat.mMatrix[VY][VX] = lerp(n1.mMatrix[VY][VX], n0.mMatrix[VY][VX], w); -		gBlendRotMat.mMatrix[VY][VY] = lerp(n1.mMatrix[VY][VY], n0.mMatrix[VY][VY], w); -		gBlendRotMat.mMatrix[VY][VZ] = lerp(n1.mMatrix[VY][VZ], n0.mMatrix[VY][VZ], w); - -		gBlendRotMat.mMatrix[VZ][VX] = lerp(n1.mMatrix[VZ][VX], n0.mMatrix[VZ][VX], w); -		gBlendRotMat.mMatrix[VZ][VY] = lerp(n1.mMatrix[VZ][VY], n0.mMatrix[VZ][VY], w); -		gBlendRotMat.mMatrix[VZ][VZ] = lerp(n1.mMatrix[VZ][VZ], n0.mMatrix[VZ][VZ], w); -		 -		o_normals[bidx] = normals[index] * gBlendRotMat; +		/*else +		{ //weight didn't change +			LLVector4a res; +			gBlendMat.affineTransform(coord, res); +			o_vertices[bidx].setVec(res[0], res[1], res[2]); +			gBlendMat.rotate(norm, res); +			o_normals[bidx].setVec(res[0], res[1], res[2]); +		}*/  	}  	buffer->setBuffer(0); diff --git a/indra/newview/llviewerjointmesh_vec.cpp b/indra/newview/llviewerjointmesh_vec.cpp index 8fb9d1cf68..a1225c9d1c 100644 --- a/indra/newview/llviewerjointmesh_vec.cpp +++ b/indra/newview/llviewerjointmesh_vec.cpp @@ -52,6 +52,7 @@  // static  void LLViewerJointMesh::updateGeometryVectorized(LLFace *face, LLPolyMesh *mesh)  { +#if 0  	static LLV4Matrix4	sJointMat[32];  	LLDynamicArray<LLJointRenderData*>& joint_data = mesh->getReferenceMesh()->mJointRenderData;  	S32 j, joint_num, joint_end = joint_data.count(); @@ -98,4 +99,5 @@ void LLViewerJointMesh::updateGeometryVectorized(LLFace *face, LLPolyMesh *mesh)  	}  	buffer->setBuffer(0); +#endif  } | 
