diff options
author | Dave Parks <davep@lindenlab.com> | 2010-05-20 02:22:26 -0500 |
---|---|---|
committer | Dave Parks <davep@lindenlab.com> | 2010-05-20 02:22:26 -0500 |
commit | f14215689244a65064158e475e4f41eb149d85b0 (patch) | |
tree | 34e18f86c0f8f62d176b88b1066afece63a5f891 /indra/newview/llviewerjointmesh.cpp | |
parent | f41e2d3752646fd5ffdb9764b1d3434e68a9baeb (diff) |
Vectorized avatar vertex skinning.
Diffstat (limited to 'indra/newview/llviewerjointmesh.cpp')
-rw-r--r-- | indra/newview/llviewerjointmesh.cpp | 145 |
1 files changed, 70 insertions, 75 deletions
diff --git a/indra/newview/llviewerjointmesh.cpp b/indra/newview/llviewerjointmesh.cpp index deb3d8fd97..294dfdcb55 100644 --- a/indra/newview/llviewerjointmesh.cpp +++ b/indra/newview/llviewerjointmesh.cpp @@ -61,6 +61,7 @@ #include "v4math.h" #include "m3math.h" #include "m4math.h" +#include "llmatrix4a.h" #if !LL_DARWIN && !LL_LINUX && !LL_SOLARIS extern PFNGLWEIGHTPOINTERARBPROC glWeightPointerARB; @@ -382,6 +383,7 @@ const S32 NUM_AXES = 3; // pivot parent 0-n -- child = n+1 static LLMatrix4 gJointMatUnaligned[32]; +static LLMatrix4a gJointMatAligned[32]; static LLMatrix3 gJointRotUnaligned[32]; static LLVector4 gJointPivot[32]; @@ -467,6 +469,14 @@ void LLViewerJointMesh::uploadJointMatrices() glUniform4fvARB(gAvatarMatrixParam, 45, mat); stop_glerror(); } + else + { + //load gJointMatUnaligned into gJointMatAligned + for (joint_num = 0; joint_num < reference_mesh->mJointRenderData.count(); ++joint_num) + { + gJointMatAligned[joint_num].loadu(gJointMatUnaligned[joint_num]); + } + } } //-------------------------------------------------------------------- @@ -723,7 +733,7 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w v[0] = coords[0]; v[1] = coords[1]; v[2] = coords[2]; - coords += 3; + coords += 4; v += skip; } @@ -732,12 +742,12 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w n[0] = normals[0]; n[1] = normals[1]; n[2] = normals[2]; - normals += 3; + normals += 4; n += skip; } } else - { + { U32* __restrict tc = (U32*) tex_coordsp.get(); U32* __restrict vw = (U32*) vertex_weightsp.get(); @@ -745,18 +755,20 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w do { - v[0] = *(coords++); - v[1] = *(coords++); - v[2] = *(coords++); + v[0] = coords[0]; + v[1] = coords[1]; + v[2] = coords[2]; + coords += 4; v += skip; tc[0] = *(tex_coords++); tc[1] = *(tex_coords++); tc += skip; - n[0] = *(normals++); - n[1] = *(normals++); - n[2] = *(normals++); + n[0] = normals[0]; + n[1] = normals[1]; + n[2] = normals[2]; + normals += 4; n += skip; vw[0] = *(weights++); @@ -808,17 +820,17 @@ void LLViewerJointMesh::updateGeometryOriginal(LLFace *mFace, LLPolyMesh *mMesh) LLStrider<LLVector3> o_normals; //get vertex and normal striders - LLVertexBuffer *buffer = mFace->mVertexBuffer; + LLVertexBuffer* buffer = mFace->mVertexBuffer; buffer->getVertexStrider(o_vertices, 0); buffer->getNormalStrider(o_normals, 0); - F32 last_weight = F32_MAX; - LLMatrix4 gBlendMat; - LLMatrix3 gBlendRotMat; + //F32 last_weight = F32_MAX; + LLMatrix4a gBlendMat; + + __restrict const F32* weights = mMesh->getWeights(); + __restrict const LLVector4* coords = mMesh->getCoords(); + __restrict const LLVector4* normals = mMesh->getNormals(); - const F32* weights = mMesh->getWeights(); - const LLVector3* coords = mMesh->getCoords(); - const LLVector3* normals = mMesh->getNormals(); for (U32 index = 0; index < mMesh->getNumVertices(); index++) { U32 bidx = index + mMesh->mFaceVertexOffset; @@ -826,71 +838,54 @@ void LLViewerJointMesh::updateGeometryOriginal(LLFace *mFace, LLPolyMesh *mMesh) // blend by first matrix F32 w = weights[index]; + LLVector4a coord; + coord.load4a(coords[index].mV); + + LLVector4a norm; + norm.load4a(normals[index].mV); + // Maybe we don't have to change gBlendMat. // Profiles of a single-avatar scene on a Mac show this to be a very // common case. JC - if (w == last_weight) + //if (w != last_weight) { - o_vertices[bidx] = coords[index] * gBlendMat; - o_normals[bidx] = normals[index] * gBlendRotMat; - continue; - } - - last_weight = w; + //last_weight = w; - S32 joint = llfloor(w); - w -= joint; - - // No lerp required in this case. - if (w == 1.0f) - { - gBlendMat = gJointMatUnaligned[joint+1]; - o_vertices[bidx] = coords[index] * gBlendMat; - gBlendRotMat = gJointRotUnaligned[joint+1]; - o_normals[bidx] = normals[index] * gBlendRotMat; - continue; + S32 joint = llfloor(w); + w -= joint; + + + if (w >= 0.f) + { + // Try to keep all the accesses to the matrix data as close + // together as possible. This function is a hot spot on the + // Mac. JC + gBlendMat.setLerp(gJointMatAligned[joint+0], + gJointMatAligned[joint+1], w); + + LLVector4a res; + gBlendMat.affineTransform(coord, res); + o_vertices[bidx].setVec(res[0], res[1], res[2]); + gBlendMat.rotate(norm, res); + o_normals[bidx].setVec(res[0], res[1], res[2]); + } + else + { // No lerp required in this case. + LLVector4a res; + gJointMatAligned[joint].affineTransform(coord, res); + o_vertices[bidx].setVec(res[0], res[1], res[2]); + gJointMatAligned[joint].rotate(norm, res); + o_normals[bidx].setVec(res[0], res[1], res[2]); + } } - - // Try to keep all the accesses to the matrix data as close - // together as possible. This function is a hot spot on the - // Mac. JC - LLMatrix4 &m0 = gJointMatUnaligned[joint+1]; - LLMatrix4 &m1 = gJointMatUnaligned[joint+0]; - - gBlendMat.mMatrix[VX][VX] = lerp(m1.mMatrix[VX][VX], m0.mMatrix[VX][VX], w); - gBlendMat.mMatrix[VX][VY] = lerp(m1.mMatrix[VX][VY], m0.mMatrix[VX][VY], w); - gBlendMat.mMatrix[VX][VZ] = lerp(m1.mMatrix[VX][VZ], m0.mMatrix[VX][VZ], w); - - gBlendMat.mMatrix[VY][VX] = lerp(m1.mMatrix[VY][VX], m0.mMatrix[VY][VX], w); - gBlendMat.mMatrix[VY][VY] = lerp(m1.mMatrix[VY][VY], m0.mMatrix[VY][VY], w); - gBlendMat.mMatrix[VY][VZ] = lerp(m1.mMatrix[VY][VZ], m0.mMatrix[VY][VZ], w); - - gBlendMat.mMatrix[VZ][VX] = lerp(m1.mMatrix[VZ][VX], m0.mMatrix[VZ][VX], w); - gBlendMat.mMatrix[VZ][VY] = lerp(m1.mMatrix[VZ][VY], m0.mMatrix[VZ][VY], w); - gBlendMat.mMatrix[VZ][VZ] = lerp(m1.mMatrix[VZ][VZ], m0.mMatrix[VZ][VZ], w); - - gBlendMat.mMatrix[VW][VX] = lerp(m1.mMatrix[VW][VX], m0.mMatrix[VW][VX], w); - gBlendMat.mMatrix[VW][VY] = lerp(m1.mMatrix[VW][VY], m0.mMatrix[VW][VY], w); - gBlendMat.mMatrix[VW][VZ] = lerp(m1.mMatrix[VW][VZ], m0.mMatrix[VW][VZ], w); - - o_vertices[bidx] = coords[index] * gBlendMat; - - LLMatrix3 &n0 = gJointRotUnaligned[joint+1]; - LLMatrix3 &n1 = gJointRotUnaligned[joint+0]; - - gBlendRotMat.mMatrix[VX][VX] = lerp(n1.mMatrix[VX][VX], n0.mMatrix[VX][VX], w); - gBlendRotMat.mMatrix[VX][VY] = lerp(n1.mMatrix[VX][VY], n0.mMatrix[VX][VY], w); - gBlendRotMat.mMatrix[VX][VZ] = lerp(n1.mMatrix[VX][VZ], n0.mMatrix[VX][VZ], w); - - gBlendRotMat.mMatrix[VY][VX] = lerp(n1.mMatrix[VY][VX], n0.mMatrix[VY][VX], w); - gBlendRotMat.mMatrix[VY][VY] = lerp(n1.mMatrix[VY][VY], n0.mMatrix[VY][VY], w); - gBlendRotMat.mMatrix[VY][VZ] = lerp(n1.mMatrix[VY][VZ], n0.mMatrix[VY][VZ], w); - - gBlendRotMat.mMatrix[VZ][VX] = lerp(n1.mMatrix[VZ][VX], n0.mMatrix[VZ][VX], w); - gBlendRotMat.mMatrix[VZ][VY] = lerp(n1.mMatrix[VZ][VY], n0.mMatrix[VZ][VY], w); - gBlendRotMat.mMatrix[VZ][VZ] = lerp(n1.mMatrix[VZ][VZ], n0.mMatrix[VZ][VZ], w); - - o_normals[bidx] = normals[index] * gBlendRotMat; + /*else + { //weight didn't change + LLVector4a res; + gBlendMat.affineTransform(coord, res); + o_vertices[bidx].setVec(res[0], res[1], res[2]); + gBlendMat.rotate(norm, res); + o_normals[bidx].setVec(res[0], res[1], res[2]); + }*/ } buffer->setBuffer(0); |