diff options
author | Dave Parks <davep@lindenlab.com> | 2010-05-20 02:22:26 -0500 |
---|---|---|
committer | Dave Parks <davep@lindenlab.com> | 2010-05-20 02:22:26 -0500 |
commit | f14215689244a65064158e475e4f41eb149d85b0 (patch) | |
tree | 34e18f86c0f8f62d176b88b1066afece63a5f891 /indra | |
parent | f41e2d3752646fd5ffdb9764b1d3434e68a9baeb (diff) |
Vectorized avatar vertex skinning.
Diffstat (limited to 'indra')
-rw-r--r-- | indra/newview/llpolymesh.cpp | 26 | ||||
-rw-r--r-- | indra/newview/llpolymesh.h | 12 | ||||
-rw-r--r-- | indra/newview/llpolymorph.cpp | 13 | ||||
-rw-r--r-- | indra/newview/llviewerjointmesh.cpp | 145 | ||||
-rw-r--r-- | indra/newview/llviewerjointmesh_vec.cpp | 2 |
5 files changed, 101 insertions, 97 deletions
diff --git a/indra/newview/llpolymesh.cpp b/indra/newview/llpolymesh.cpp index d5a2d66bcf..b8bdbfb2f8 100644 --- a/indra/newview/llpolymesh.cpp +++ b/indra/newview/llpolymesh.cpp @@ -708,15 +708,17 @@ LLPolyMesh::LLPolyMesh(LLPolyMeshSharedData *shared_data, LLPolyMesh *reference_ mClothingWeights = reference_mesh->mClothingWeights; } else - { + { #if 1 // Allocate memory without initializing every vector // NOTE: This makes asusmptions about the size of LLVector[234] int nverts = mSharedData->mNumVertices; - int nfloats = nverts * (3*5 + 2 + 4); - mVertexData = new F32[nfloats]; + int nfloats = nverts * (2*4 + 3*3 + 2 + 4); + + //use aligned vertex data to make LLPolyMesh SSE friendly + mVertexData = (F32*) _mm_malloc(nfloats*4, 16); int offset = 0; - mCoords = (LLVector3*)(mVertexData + offset); offset += 3*nverts; - mNormals = (LLVector3*)(mVertexData + offset); offset += 3*nverts; + mCoords = (LLVector4*)(mVertexData + offset); offset += 4*nverts; + mNormals = (LLVector4*)(mVertexData + offset); offset += 4*nverts; mScaledNormals = (LLVector3*)(mVertexData + offset); offset += 3*nverts; mBinormals = (LLVector3*)(mVertexData + offset); offset += 3*nverts; mScaledBinormals = (LLVector3*)(mVertexData + offset); offset += 3*nverts; @@ -757,7 +759,7 @@ LLPolyMesh::~LLPolyMesh() delete [] mClothingWeights; delete [] mTexCoords; #else - delete [] mVertexData; + _mm_free(mVertexData); #endif } @@ -864,7 +866,7 @@ void LLPolyMesh::dumpDiagInfo() //----------------------------------------------------------------------------- // getWritableCoords() //----------------------------------------------------------------------------- -LLVector3 *LLPolyMesh::getWritableCoords() +LLVector4 *LLPolyMesh::getWritableCoords() { return mCoords; } @@ -872,7 +874,7 @@ LLVector3 *LLPolyMesh::getWritableCoords() //----------------------------------------------------------------------------- // getWritableNormals() //----------------------------------------------------------------------------- -LLVector3 *LLPolyMesh::getWritableNormals() +LLVector4 *LLPolyMesh::getWritableNormals() { return mNormals; } @@ -927,8 +929,12 @@ void LLPolyMesh::initializeForMorph() if (!mSharedData) return; - memcpy(mCoords, mSharedData->mBaseCoords, sizeof(LLVector3) * mSharedData->mNumVertices); /*Flawfinder: ignore*/ - memcpy(mNormals, mSharedData->mBaseNormals, sizeof(LLVector3) * mSharedData->mNumVertices); /*Flawfinder: ignore*/ + for (U32 i = 0; i < mSharedData->mNumVertices; ++i) + { + mCoords[i] = LLVector4(mSharedData->mBaseCoords[i]); + mNormals[i] = LLVector4(mSharedData->mBaseNormals[i]); + } + memcpy(mScaledNormals, mSharedData->mBaseNormals, sizeof(LLVector3) * mSharedData->mNumVertices); /*Flawfinder: ignore*/ memcpy(mBinormals, mSharedData->mBaseBinormals, sizeof(LLVector3) * mSharedData->mNumVertices); /*Flawfinder: ignore*/ memcpy(mScaledBinormals, mSharedData->mBaseBinormals, sizeof(LLVector3) * mSharedData->mNumVertices); /*Flawfinder: ignore*/ diff --git a/indra/newview/llpolymesh.h b/indra/newview/llpolymesh.h index c2e5451dfe..d86568a1ba 100644 --- a/indra/newview/llpolymesh.h +++ b/indra/newview/llpolymesh.h @@ -223,15 +223,15 @@ public: } // Get coords - const LLVector3 *getCoords() const{ + const LLVector4 *getCoords() const{ return mCoords; } // non const version - LLVector3 *getWritableCoords(); + LLVector4 *getWritableCoords(); // Get normals - const LLVector3 *getNormals() const{ + const LLVector4 *getNormals() const{ return mNormals; } @@ -253,7 +253,7 @@ public: } // intermediate morphed normals and output normals - LLVector3 *getWritableNormals(); + LLVector4 *getWritableNormals(); LLVector3 *getScaledNormals(); LLVector3 *getWritableBinormals(); @@ -347,11 +347,11 @@ protected: // Single array of floats for allocation / deletion F32 *mVertexData; // deformed vertices (resulting from application of morph targets) - LLVector3 *mCoords; + LLVector4 *mCoords; // deformed normals (resulting from application of morph targets) LLVector3 *mScaledNormals; // output normals (after normalization) - LLVector3 *mNormals; + LLVector4 *mNormals; // deformed binormals (resulting from application of morph targets) LLVector3 *mScaledBinormals; // output binormals (after normalization) diff --git a/indra/newview/llpolymorph.cpp b/indra/newview/llpolymorph.cpp index 80983cad24..2058c351c4 100644 --- a/indra/newview/llpolymorph.cpp +++ b/indra/newview/llpolymorph.cpp @@ -461,10 +461,10 @@ void LLPolyMorphTarget::apply( ESex avatar_sex ) if (delta_weight != 0.f) { llassert(!mMesh->isLOD()); - LLVector3 *coords = mMesh->getWritableCoords(); + LLVector4 *coords = mMesh->getWritableCoords(); LLVector3 *scaled_normals = mMesh->getScaledNormals(); - LLVector3 *normals = mMesh->getWritableNormals(); + LLVector4 *normals = mMesh->getWritableNormals(); LLVector3 *scaled_binormals = mMesh->getScaledBinormals(); LLVector3 *binormals = mMesh->getWritableBinormals(); @@ -484,7 +484,8 @@ void LLPolyMorphTarget::apply( ESex avatar_sex ) maskWeight = maskWeightArray[vert_index_morph]; } - coords[vert_index_mesh] += mMorphData->mCoords[vert_index_morph] * delta_weight * maskWeight; + coords[vert_index_mesh] += LLVector4(mMorphData->mCoords[vert_index_morph] * delta_weight * maskWeight); + if (getInfo()->mIsClothingMorph && clothing_weights) { LLVector3 clothing_offset = mMorphData->mCoords[vert_index_morph] * delta_weight * maskWeight; @@ -499,7 +500,7 @@ void LLPolyMorphTarget::apply( ESex avatar_sex ) scaled_normals[vert_index_mesh] += mMorphData->mNormals[vert_index_morph] * delta_weight * maskWeight * NORMAL_SOFTEN_FACTOR; LLVector3 normalized_normal = scaled_normals[vert_index_mesh]; normalized_normal.normVec(); - normals[vert_index_mesh] = normalized_normal; + normals[vert_index_mesh] = LLVector4(normalized_normal); // calculate new binormals scaled_binormals[vert_index_mesh] += mMorphData->mBinormals[vert_index_morph] * delta_weight * maskWeight * NORMAL_SOFTEN_FACTOR; @@ -548,7 +549,7 @@ void LLPolyMorphTarget::applyMask(U8 *maskTextureData, S32 width, S32 height, S3 if (maskWeights) { - LLVector3 *coords = mMesh->getWritableCoords(); + LLVector4 *coords = mMesh->getWritableCoords(); LLVector3 *scaled_normals = mMesh->getScaledNormals(); LLVector3 *scaled_binormals = mMesh->getScaledBinormals(); LLVector2 *tex_coords = mMesh->getWritableTexCoords(); @@ -559,7 +560,7 @@ void LLPolyMorphTarget::applyMask(U8 *maskTextureData, S32 width, S32 height, S3 S32 out_vert = mMorphData->mVertexIndices[vert]; // remove effect of existing masked morph - coords[out_vert] -= mMorphData->mCoords[vert] * lastMaskWeight; + coords[out_vert] -= LLVector4(mMorphData->mCoords[vert]) * lastMaskWeight; scaled_normals[out_vert] -= mMorphData->mNormals[vert] * lastMaskWeight * NORMAL_SOFTEN_FACTOR; scaled_binormals[out_vert] -= mMorphData->mBinormals[vert] * lastMaskWeight * NORMAL_SOFTEN_FACTOR; tex_coords[out_vert] -= mMorphData->mTexCoords[vert] * lastMaskWeight; diff --git a/indra/newview/llviewerjointmesh.cpp b/indra/newview/llviewerjointmesh.cpp index deb3d8fd97..294dfdcb55 100644 --- a/indra/newview/llviewerjointmesh.cpp +++ b/indra/newview/llviewerjointmesh.cpp @@ -61,6 +61,7 @@ #include "v4math.h" #include "m3math.h" #include "m4math.h" +#include "llmatrix4a.h" #if !LL_DARWIN && !LL_LINUX && !LL_SOLARIS extern PFNGLWEIGHTPOINTERARBPROC glWeightPointerARB; @@ -382,6 +383,7 @@ const S32 NUM_AXES = 3; // pivot parent 0-n -- child = n+1 static LLMatrix4 gJointMatUnaligned[32]; +static LLMatrix4a gJointMatAligned[32]; static LLMatrix3 gJointRotUnaligned[32]; static LLVector4 gJointPivot[32]; @@ -467,6 +469,14 @@ void LLViewerJointMesh::uploadJointMatrices() glUniform4fvARB(gAvatarMatrixParam, 45, mat); stop_glerror(); } + else + { + //load gJointMatUnaligned into gJointMatAligned + for (joint_num = 0; joint_num < reference_mesh->mJointRenderData.count(); ++joint_num) + { + gJointMatAligned[joint_num].loadu(gJointMatUnaligned[joint_num]); + } + } } //-------------------------------------------------------------------- @@ -723,7 +733,7 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w v[0] = coords[0]; v[1] = coords[1]; v[2] = coords[2]; - coords += 3; + coords += 4; v += skip; } @@ -732,12 +742,12 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w n[0] = normals[0]; n[1] = normals[1]; n[2] = normals[2]; - normals += 3; + normals += 4; n += skip; } } else - { + { U32* __restrict tc = (U32*) tex_coordsp.get(); U32* __restrict vw = (U32*) vertex_weightsp.get(); @@ -745,18 +755,20 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w do { - v[0] = *(coords++); - v[1] = *(coords++); - v[2] = *(coords++); + v[0] = coords[0]; + v[1] = coords[1]; + v[2] = coords[2]; + coords += 4; v += skip; tc[0] = *(tex_coords++); tc[1] = *(tex_coords++); tc += skip; - n[0] = *(normals++); - n[1] = *(normals++); - n[2] = *(normals++); + n[0] = normals[0]; + n[1] = normals[1]; + n[2] = normals[2]; + normals += 4; n += skip; vw[0] = *(weights++); @@ -808,17 +820,17 @@ void LLViewerJointMesh::updateGeometryOriginal(LLFace *mFace, LLPolyMesh *mMesh) LLStrider<LLVector3> o_normals; //get vertex and normal striders - LLVertexBuffer *buffer = mFace->mVertexBuffer; + LLVertexBuffer* buffer = mFace->mVertexBuffer; buffer->getVertexStrider(o_vertices, 0); buffer->getNormalStrider(o_normals, 0); - F32 last_weight = F32_MAX; - LLMatrix4 gBlendMat; - LLMatrix3 gBlendRotMat; + //F32 last_weight = F32_MAX; + LLMatrix4a gBlendMat; + + __restrict const F32* weights = mMesh->getWeights(); + __restrict const LLVector4* coords = mMesh->getCoords(); + __restrict const LLVector4* normals = mMesh->getNormals(); - const F32* weights = mMesh->getWeights(); - const LLVector3* coords = mMesh->getCoords(); - const LLVector3* normals = mMesh->getNormals(); for (U32 index = 0; index < mMesh->getNumVertices(); index++) { U32 bidx = index + mMesh->mFaceVertexOffset; @@ -826,71 +838,54 @@ void LLViewerJointMesh::updateGeometryOriginal(LLFace *mFace, LLPolyMesh *mMesh) // blend by first matrix F32 w = weights[index]; + LLVector4a coord; + coord.load4a(coords[index].mV); + + LLVector4a norm; + norm.load4a(normals[index].mV); + // Maybe we don't have to change gBlendMat. // Profiles of a single-avatar scene on a Mac show this to be a very // common case. JC - if (w == last_weight) + //if (w != last_weight) { - o_vertices[bidx] = coords[index] * gBlendMat; - o_normals[bidx] = normals[index] * gBlendRotMat; - continue; - } - - last_weight = w; + //last_weight = w; - S32 joint = llfloor(w); - w -= joint; - - // No lerp required in this case. - if (w == 1.0f) - { - gBlendMat = gJointMatUnaligned[joint+1]; - o_vertices[bidx] = coords[index] * gBlendMat; - gBlendRotMat = gJointRotUnaligned[joint+1]; - o_normals[bidx] = normals[index] * gBlendRotMat; - continue; + S32 joint = llfloor(w); + w -= joint; + + + if (w >= 0.f) + { + // Try to keep all the accesses to the matrix data as close + // together as possible. This function is a hot spot on the + // Mac. JC + gBlendMat.setLerp(gJointMatAligned[joint+0], + gJointMatAligned[joint+1], w); + + LLVector4a res; + gBlendMat.affineTransform(coord, res); + o_vertices[bidx].setVec(res[0], res[1], res[2]); + gBlendMat.rotate(norm, res); + o_normals[bidx].setVec(res[0], res[1], res[2]); + } + else + { // No lerp required in this case. + LLVector4a res; + gJointMatAligned[joint].affineTransform(coord, res); + o_vertices[bidx].setVec(res[0], res[1], res[2]); + gJointMatAligned[joint].rotate(norm, res); + o_normals[bidx].setVec(res[0], res[1], res[2]); + } } - - // Try to keep all the accesses to the matrix data as close - // together as possible. This function is a hot spot on the - // Mac. JC - LLMatrix4 &m0 = gJointMatUnaligned[joint+1]; - LLMatrix4 &m1 = gJointMatUnaligned[joint+0]; - - gBlendMat.mMatrix[VX][VX] = lerp(m1.mMatrix[VX][VX], m0.mMatrix[VX][VX], w); - gBlendMat.mMatrix[VX][VY] = lerp(m1.mMatrix[VX][VY], m0.mMatrix[VX][VY], w); - gBlendMat.mMatrix[VX][VZ] = lerp(m1.mMatrix[VX][VZ], m0.mMatrix[VX][VZ], w); - - gBlendMat.mMatrix[VY][VX] = lerp(m1.mMatrix[VY][VX], m0.mMatrix[VY][VX], w); - gBlendMat.mMatrix[VY][VY] = lerp(m1.mMatrix[VY][VY], m0.mMatrix[VY][VY], w); - gBlendMat.mMatrix[VY][VZ] = lerp(m1.mMatrix[VY][VZ], m0.mMatrix[VY][VZ], w); - - gBlendMat.mMatrix[VZ][VX] = lerp(m1.mMatrix[VZ][VX], m0.mMatrix[VZ][VX], w); - gBlendMat.mMatrix[VZ][VY] = lerp(m1.mMatrix[VZ][VY], m0.mMatrix[VZ][VY], w); - gBlendMat.mMatrix[VZ][VZ] = lerp(m1.mMatrix[VZ][VZ], m0.mMatrix[VZ][VZ], w); - - gBlendMat.mMatrix[VW][VX] = lerp(m1.mMatrix[VW][VX], m0.mMatrix[VW][VX], w); - gBlendMat.mMatrix[VW][VY] = lerp(m1.mMatrix[VW][VY], m0.mMatrix[VW][VY], w); - gBlendMat.mMatrix[VW][VZ] = lerp(m1.mMatrix[VW][VZ], m0.mMatrix[VW][VZ], w); - - o_vertices[bidx] = coords[index] * gBlendMat; - - LLMatrix3 &n0 = gJointRotUnaligned[joint+1]; - LLMatrix3 &n1 = gJointRotUnaligned[joint+0]; - - gBlendRotMat.mMatrix[VX][VX] = lerp(n1.mMatrix[VX][VX], n0.mMatrix[VX][VX], w); - gBlendRotMat.mMatrix[VX][VY] = lerp(n1.mMatrix[VX][VY], n0.mMatrix[VX][VY], w); - gBlendRotMat.mMatrix[VX][VZ] = lerp(n1.mMatrix[VX][VZ], n0.mMatrix[VX][VZ], w); - - gBlendRotMat.mMatrix[VY][VX] = lerp(n1.mMatrix[VY][VX], n0.mMatrix[VY][VX], w); - gBlendRotMat.mMatrix[VY][VY] = lerp(n1.mMatrix[VY][VY], n0.mMatrix[VY][VY], w); - gBlendRotMat.mMatrix[VY][VZ] = lerp(n1.mMatrix[VY][VZ], n0.mMatrix[VY][VZ], w); - - gBlendRotMat.mMatrix[VZ][VX] = lerp(n1.mMatrix[VZ][VX], n0.mMatrix[VZ][VX], w); - gBlendRotMat.mMatrix[VZ][VY] = lerp(n1.mMatrix[VZ][VY], n0.mMatrix[VZ][VY], w); - gBlendRotMat.mMatrix[VZ][VZ] = lerp(n1.mMatrix[VZ][VZ], n0.mMatrix[VZ][VZ], w); - - o_normals[bidx] = normals[index] * gBlendRotMat; + /*else + { //weight didn't change + LLVector4a res; + gBlendMat.affineTransform(coord, res); + o_vertices[bidx].setVec(res[0], res[1], res[2]); + gBlendMat.rotate(norm, res); + o_normals[bidx].setVec(res[0], res[1], res[2]); + }*/ } buffer->setBuffer(0); diff --git a/indra/newview/llviewerjointmesh_vec.cpp b/indra/newview/llviewerjointmesh_vec.cpp index 8fb9d1cf68..a1225c9d1c 100644 --- a/indra/newview/llviewerjointmesh_vec.cpp +++ b/indra/newview/llviewerjointmesh_vec.cpp @@ -52,6 +52,7 @@ // static void LLViewerJointMesh::updateGeometryVectorized(LLFace *face, LLPolyMesh *mesh) { +#if 0 static LLV4Matrix4 sJointMat[32]; LLDynamicArray<LLJointRenderData*>& joint_data = mesh->getReferenceMesh()->mJointRenderData; S32 j, joint_num, joint_end = joint_data.count(); @@ -98,4 +99,5 @@ void LLViewerJointMesh::updateGeometryVectorized(LLFace *face, LLPolyMesh *mesh) } buffer->setBuffer(0); +#endif } |