summaryrefslogtreecommitdiff
path: root/indra
diff options
context:
space:
mode:
authorDave Parks <davep@lindenlab.com>2010-05-20 02:22:26 -0500
committerDave Parks <davep@lindenlab.com>2010-05-20 02:22:26 -0500
commitf14215689244a65064158e475e4f41eb149d85b0 (patch)
tree34e18f86c0f8f62d176b88b1066afece63a5f891 /indra
parentf41e2d3752646fd5ffdb9764b1d3434e68a9baeb (diff)
Vectorized avatar vertex skinning.
Diffstat (limited to 'indra')
-rw-r--r--indra/newview/llpolymesh.cpp26
-rw-r--r--indra/newview/llpolymesh.h12
-rw-r--r--indra/newview/llpolymorph.cpp13
-rw-r--r--indra/newview/llviewerjointmesh.cpp145
-rw-r--r--indra/newview/llviewerjointmesh_vec.cpp2
5 files changed, 101 insertions, 97 deletions
diff --git a/indra/newview/llpolymesh.cpp b/indra/newview/llpolymesh.cpp
index d5a2d66bcf..b8bdbfb2f8 100644
--- a/indra/newview/llpolymesh.cpp
+++ b/indra/newview/llpolymesh.cpp
@@ -708,15 +708,17 @@ LLPolyMesh::LLPolyMesh(LLPolyMeshSharedData *shared_data, LLPolyMesh *reference_
mClothingWeights = reference_mesh->mClothingWeights;
}
else
- {
+ {
#if 1 // Allocate memory without initializing every vector
// NOTE: This makes asusmptions about the size of LLVector[234]
int nverts = mSharedData->mNumVertices;
- int nfloats = nverts * (3*5 + 2 + 4);
- mVertexData = new F32[nfloats];
+ int nfloats = nverts * (2*4 + 3*3 + 2 + 4);
+
+ //use aligned vertex data to make LLPolyMesh SSE friendly
+ mVertexData = (F32*) _mm_malloc(nfloats*4, 16);
int offset = 0;
- mCoords = (LLVector3*)(mVertexData + offset); offset += 3*nverts;
- mNormals = (LLVector3*)(mVertexData + offset); offset += 3*nverts;
+ mCoords = (LLVector4*)(mVertexData + offset); offset += 4*nverts;
+ mNormals = (LLVector4*)(mVertexData + offset); offset += 4*nverts;
mScaledNormals = (LLVector3*)(mVertexData + offset); offset += 3*nverts;
mBinormals = (LLVector3*)(mVertexData + offset); offset += 3*nverts;
mScaledBinormals = (LLVector3*)(mVertexData + offset); offset += 3*nverts;
@@ -757,7 +759,7 @@ LLPolyMesh::~LLPolyMesh()
delete [] mClothingWeights;
delete [] mTexCoords;
#else
- delete [] mVertexData;
+ _mm_free(mVertexData);
#endif
}
@@ -864,7 +866,7 @@ void LLPolyMesh::dumpDiagInfo()
//-----------------------------------------------------------------------------
// getWritableCoords()
//-----------------------------------------------------------------------------
-LLVector3 *LLPolyMesh::getWritableCoords()
+LLVector4 *LLPolyMesh::getWritableCoords()
{
return mCoords;
}
@@ -872,7 +874,7 @@ LLVector3 *LLPolyMesh::getWritableCoords()
//-----------------------------------------------------------------------------
// getWritableNormals()
//-----------------------------------------------------------------------------
-LLVector3 *LLPolyMesh::getWritableNormals()
+LLVector4 *LLPolyMesh::getWritableNormals()
{
return mNormals;
}
@@ -927,8 +929,12 @@ void LLPolyMesh::initializeForMorph()
if (!mSharedData)
return;
- memcpy(mCoords, mSharedData->mBaseCoords, sizeof(LLVector3) * mSharedData->mNumVertices); /*Flawfinder: ignore*/
- memcpy(mNormals, mSharedData->mBaseNormals, sizeof(LLVector3) * mSharedData->mNumVertices); /*Flawfinder: ignore*/
+ for (U32 i = 0; i < mSharedData->mNumVertices; ++i)
+ {
+ mCoords[i] = LLVector4(mSharedData->mBaseCoords[i]);
+ mNormals[i] = LLVector4(mSharedData->mBaseNormals[i]);
+ }
+
memcpy(mScaledNormals, mSharedData->mBaseNormals, sizeof(LLVector3) * mSharedData->mNumVertices); /*Flawfinder: ignore*/
memcpy(mBinormals, mSharedData->mBaseBinormals, sizeof(LLVector3) * mSharedData->mNumVertices); /*Flawfinder: ignore*/
memcpy(mScaledBinormals, mSharedData->mBaseBinormals, sizeof(LLVector3) * mSharedData->mNumVertices); /*Flawfinder: ignore*/
diff --git a/indra/newview/llpolymesh.h b/indra/newview/llpolymesh.h
index c2e5451dfe..d86568a1ba 100644
--- a/indra/newview/llpolymesh.h
+++ b/indra/newview/llpolymesh.h
@@ -223,15 +223,15 @@ public:
}
// Get coords
- const LLVector3 *getCoords() const{
+ const LLVector4 *getCoords() const{
return mCoords;
}
// non const version
- LLVector3 *getWritableCoords();
+ LLVector4 *getWritableCoords();
// Get normals
- const LLVector3 *getNormals() const{
+ const LLVector4 *getNormals() const{
return mNormals;
}
@@ -253,7 +253,7 @@ public:
}
// intermediate morphed normals and output normals
- LLVector3 *getWritableNormals();
+ LLVector4 *getWritableNormals();
LLVector3 *getScaledNormals();
LLVector3 *getWritableBinormals();
@@ -347,11 +347,11 @@ protected:
// Single array of floats for allocation / deletion
F32 *mVertexData;
// deformed vertices (resulting from application of morph targets)
- LLVector3 *mCoords;
+ LLVector4 *mCoords;
// deformed normals (resulting from application of morph targets)
LLVector3 *mScaledNormals;
// output normals (after normalization)
- LLVector3 *mNormals;
+ LLVector4 *mNormals;
// deformed binormals (resulting from application of morph targets)
LLVector3 *mScaledBinormals;
// output binormals (after normalization)
diff --git a/indra/newview/llpolymorph.cpp b/indra/newview/llpolymorph.cpp
index 80983cad24..2058c351c4 100644
--- a/indra/newview/llpolymorph.cpp
+++ b/indra/newview/llpolymorph.cpp
@@ -461,10 +461,10 @@ void LLPolyMorphTarget::apply( ESex avatar_sex )
if (delta_weight != 0.f)
{
llassert(!mMesh->isLOD());
- LLVector3 *coords = mMesh->getWritableCoords();
+ LLVector4 *coords = mMesh->getWritableCoords();
LLVector3 *scaled_normals = mMesh->getScaledNormals();
- LLVector3 *normals = mMesh->getWritableNormals();
+ LLVector4 *normals = mMesh->getWritableNormals();
LLVector3 *scaled_binormals = mMesh->getScaledBinormals();
LLVector3 *binormals = mMesh->getWritableBinormals();
@@ -484,7 +484,8 @@ void LLPolyMorphTarget::apply( ESex avatar_sex )
maskWeight = maskWeightArray[vert_index_morph];
}
- coords[vert_index_mesh] += mMorphData->mCoords[vert_index_morph] * delta_weight * maskWeight;
+ coords[vert_index_mesh] += LLVector4(mMorphData->mCoords[vert_index_morph] * delta_weight * maskWeight);
+
if (getInfo()->mIsClothingMorph && clothing_weights)
{
LLVector3 clothing_offset = mMorphData->mCoords[vert_index_morph] * delta_weight * maskWeight;
@@ -499,7 +500,7 @@ void LLPolyMorphTarget::apply( ESex avatar_sex )
scaled_normals[vert_index_mesh] += mMorphData->mNormals[vert_index_morph] * delta_weight * maskWeight * NORMAL_SOFTEN_FACTOR;
LLVector3 normalized_normal = scaled_normals[vert_index_mesh];
normalized_normal.normVec();
- normals[vert_index_mesh] = normalized_normal;
+ normals[vert_index_mesh] = LLVector4(normalized_normal);
// calculate new binormals
scaled_binormals[vert_index_mesh] += mMorphData->mBinormals[vert_index_morph] * delta_weight * maskWeight * NORMAL_SOFTEN_FACTOR;
@@ -548,7 +549,7 @@ void LLPolyMorphTarget::applyMask(U8 *maskTextureData, S32 width, S32 height, S3
if (maskWeights)
{
- LLVector3 *coords = mMesh->getWritableCoords();
+ LLVector4 *coords = mMesh->getWritableCoords();
LLVector3 *scaled_normals = mMesh->getScaledNormals();
LLVector3 *scaled_binormals = mMesh->getScaledBinormals();
LLVector2 *tex_coords = mMesh->getWritableTexCoords();
@@ -559,7 +560,7 @@ void LLPolyMorphTarget::applyMask(U8 *maskTextureData, S32 width, S32 height, S3
S32 out_vert = mMorphData->mVertexIndices[vert];
// remove effect of existing masked morph
- coords[out_vert] -= mMorphData->mCoords[vert] * lastMaskWeight;
+ coords[out_vert] -= LLVector4(mMorphData->mCoords[vert]) * lastMaskWeight;
scaled_normals[out_vert] -= mMorphData->mNormals[vert] * lastMaskWeight * NORMAL_SOFTEN_FACTOR;
scaled_binormals[out_vert] -= mMorphData->mBinormals[vert] * lastMaskWeight * NORMAL_SOFTEN_FACTOR;
tex_coords[out_vert] -= mMorphData->mTexCoords[vert] * lastMaskWeight;
diff --git a/indra/newview/llviewerjointmesh.cpp b/indra/newview/llviewerjointmesh.cpp
index deb3d8fd97..294dfdcb55 100644
--- a/indra/newview/llviewerjointmesh.cpp
+++ b/indra/newview/llviewerjointmesh.cpp
@@ -61,6 +61,7 @@
#include "v4math.h"
#include "m3math.h"
#include "m4math.h"
+#include "llmatrix4a.h"
#if !LL_DARWIN && !LL_LINUX && !LL_SOLARIS
extern PFNGLWEIGHTPOINTERARBPROC glWeightPointerARB;
@@ -382,6 +383,7 @@ const S32 NUM_AXES = 3;
// pivot parent 0-n -- child = n+1
static LLMatrix4 gJointMatUnaligned[32];
+static LLMatrix4a gJointMatAligned[32];
static LLMatrix3 gJointRotUnaligned[32];
static LLVector4 gJointPivot[32];
@@ -467,6 +469,14 @@ void LLViewerJointMesh::uploadJointMatrices()
glUniform4fvARB(gAvatarMatrixParam, 45, mat);
stop_glerror();
}
+ else
+ {
+ //load gJointMatUnaligned into gJointMatAligned
+ for (joint_num = 0; joint_num < reference_mesh->mJointRenderData.count(); ++joint_num)
+ {
+ gJointMatAligned[joint_num].loadu(gJointMatUnaligned[joint_num]);
+ }
+ }
}
//--------------------------------------------------------------------
@@ -723,7 +733,7 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w
v[0] = coords[0];
v[1] = coords[1];
v[2] = coords[2];
- coords += 3;
+ coords += 4;
v += skip;
}
@@ -732,12 +742,12 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w
n[0] = normals[0];
n[1] = normals[1];
n[2] = normals[2];
- normals += 3;
+ normals += 4;
n += skip;
}
}
else
- {
+ {
U32* __restrict tc = (U32*) tex_coordsp.get();
U32* __restrict vw = (U32*) vertex_weightsp.get();
@@ -745,18 +755,20 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w
do
{
- v[0] = *(coords++);
- v[1] = *(coords++);
- v[2] = *(coords++);
+ v[0] = coords[0];
+ v[1] = coords[1];
+ v[2] = coords[2];
+ coords += 4;
v += skip;
tc[0] = *(tex_coords++);
tc[1] = *(tex_coords++);
tc += skip;
- n[0] = *(normals++);
- n[1] = *(normals++);
- n[2] = *(normals++);
+ n[0] = normals[0];
+ n[1] = normals[1];
+ n[2] = normals[2];
+ normals += 4;
n += skip;
vw[0] = *(weights++);
@@ -808,17 +820,17 @@ void LLViewerJointMesh::updateGeometryOriginal(LLFace *mFace, LLPolyMesh *mMesh)
LLStrider<LLVector3> o_normals;
//get vertex and normal striders
- LLVertexBuffer *buffer = mFace->mVertexBuffer;
+ LLVertexBuffer* buffer = mFace->mVertexBuffer;
buffer->getVertexStrider(o_vertices, 0);
buffer->getNormalStrider(o_normals, 0);
- F32 last_weight = F32_MAX;
- LLMatrix4 gBlendMat;
- LLMatrix3 gBlendRotMat;
+ //F32 last_weight = F32_MAX;
+ LLMatrix4a gBlendMat;
+
+ __restrict const F32* weights = mMesh->getWeights();
+ __restrict const LLVector4* coords = mMesh->getCoords();
+ __restrict const LLVector4* normals = mMesh->getNormals();
- const F32* weights = mMesh->getWeights();
- const LLVector3* coords = mMesh->getCoords();
- const LLVector3* normals = mMesh->getNormals();
for (U32 index = 0; index < mMesh->getNumVertices(); index++)
{
U32 bidx = index + mMesh->mFaceVertexOffset;
@@ -826,71 +838,54 @@ void LLViewerJointMesh::updateGeometryOriginal(LLFace *mFace, LLPolyMesh *mMesh)
// blend by first matrix
F32 w = weights[index];
+ LLVector4a coord;
+ coord.load4a(coords[index].mV);
+
+ LLVector4a norm;
+ norm.load4a(normals[index].mV);
+
// Maybe we don't have to change gBlendMat.
// Profiles of a single-avatar scene on a Mac show this to be a very
// common case. JC
- if (w == last_weight)
+ //if (w != last_weight)
{
- o_vertices[bidx] = coords[index] * gBlendMat;
- o_normals[bidx] = normals[index] * gBlendRotMat;
- continue;
- }
-
- last_weight = w;
+ //last_weight = w;
- S32 joint = llfloor(w);
- w -= joint;
-
- // No lerp required in this case.
- if (w == 1.0f)
- {
- gBlendMat = gJointMatUnaligned[joint+1];
- o_vertices[bidx] = coords[index] * gBlendMat;
- gBlendRotMat = gJointRotUnaligned[joint+1];
- o_normals[bidx] = normals[index] * gBlendRotMat;
- continue;
+ S32 joint = llfloor(w);
+ w -= joint;
+
+
+ if (w >= 0.f)
+ {
+ // Try to keep all the accesses to the matrix data as close
+ // together as possible. This function is a hot spot on the
+ // Mac. JC
+ gBlendMat.setLerp(gJointMatAligned[joint+0],
+ gJointMatAligned[joint+1], w);
+
+ LLVector4a res;
+ gBlendMat.affineTransform(coord, res);
+ o_vertices[bidx].setVec(res[0], res[1], res[2]);
+ gBlendMat.rotate(norm, res);
+ o_normals[bidx].setVec(res[0], res[1], res[2]);
+ }
+ else
+ { // No lerp required in this case.
+ LLVector4a res;
+ gJointMatAligned[joint].affineTransform(coord, res);
+ o_vertices[bidx].setVec(res[0], res[1], res[2]);
+ gJointMatAligned[joint].rotate(norm, res);
+ o_normals[bidx].setVec(res[0], res[1], res[2]);
+ }
}
-
- // Try to keep all the accesses to the matrix data as close
- // together as possible. This function is a hot spot on the
- // Mac. JC
- LLMatrix4 &m0 = gJointMatUnaligned[joint+1];
- LLMatrix4 &m1 = gJointMatUnaligned[joint+0];
-
- gBlendMat.mMatrix[VX][VX] = lerp(m1.mMatrix[VX][VX], m0.mMatrix[VX][VX], w);
- gBlendMat.mMatrix[VX][VY] = lerp(m1.mMatrix[VX][VY], m0.mMatrix[VX][VY], w);
- gBlendMat.mMatrix[VX][VZ] = lerp(m1.mMatrix[VX][VZ], m0.mMatrix[VX][VZ], w);
-
- gBlendMat.mMatrix[VY][VX] = lerp(m1.mMatrix[VY][VX], m0.mMatrix[VY][VX], w);
- gBlendMat.mMatrix[VY][VY] = lerp(m1.mMatrix[VY][VY], m0.mMatrix[VY][VY], w);
- gBlendMat.mMatrix[VY][VZ] = lerp(m1.mMatrix[VY][VZ], m0.mMatrix[VY][VZ], w);
-
- gBlendMat.mMatrix[VZ][VX] = lerp(m1.mMatrix[VZ][VX], m0.mMatrix[VZ][VX], w);
- gBlendMat.mMatrix[VZ][VY] = lerp(m1.mMatrix[VZ][VY], m0.mMatrix[VZ][VY], w);
- gBlendMat.mMatrix[VZ][VZ] = lerp(m1.mMatrix[VZ][VZ], m0.mMatrix[VZ][VZ], w);
-
- gBlendMat.mMatrix[VW][VX] = lerp(m1.mMatrix[VW][VX], m0.mMatrix[VW][VX], w);
- gBlendMat.mMatrix[VW][VY] = lerp(m1.mMatrix[VW][VY], m0.mMatrix[VW][VY], w);
- gBlendMat.mMatrix[VW][VZ] = lerp(m1.mMatrix[VW][VZ], m0.mMatrix[VW][VZ], w);
-
- o_vertices[bidx] = coords[index] * gBlendMat;
-
- LLMatrix3 &n0 = gJointRotUnaligned[joint+1];
- LLMatrix3 &n1 = gJointRotUnaligned[joint+0];
-
- gBlendRotMat.mMatrix[VX][VX] = lerp(n1.mMatrix[VX][VX], n0.mMatrix[VX][VX], w);
- gBlendRotMat.mMatrix[VX][VY] = lerp(n1.mMatrix[VX][VY], n0.mMatrix[VX][VY], w);
- gBlendRotMat.mMatrix[VX][VZ] = lerp(n1.mMatrix[VX][VZ], n0.mMatrix[VX][VZ], w);
-
- gBlendRotMat.mMatrix[VY][VX] = lerp(n1.mMatrix[VY][VX], n0.mMatrix[VY][VX], w);
- gBlendRotMat.mMatrix[VY][VY] = lerp(n1.mMatrix[VY][VY], n0.mMatrix[VY][VY], w);
- gBlendRotMat.mMatrix[VY][VZ] = lerp(n1.mMatrix[VY][VZ], n0.mMatrix[VY][VZ], w);
-
- gBlendRotMat.mMatrix[VZ][VX] = lerp(n1.mMatrix[VZ][VX], n0.mMatrix[VZ][VX], w);
- gBlendRotMat.mMatrix[VZ][VY] = lerp(n1.mMatrix[VZ][VY], n0.mMatrix[VZ][VY], w);
- gBlendRotMat.mMatrix[VZ][VZ] = lerp(n1.mMatrix[VZ][VZ], n0.mMatrix[VZ][VZ], w);
-
- o_normals[bidx] = normals[index] * gBlendRotMat;
+ /*else
+ { //weight didn't change
+ LLVector4a res;
+ gBlendMat.affineTransform(coord, res);
+ o_vertices[bidx].setVec(res[0], res[1], res[2]);
+ gBlendMat.rotate(norm, res);
+ o_normals[bidx].setVec(res[0], res[1], res[2]);
+ }*/
}
buffer->setBuffer(0);
diff --git a/indra/newview/llviewerjointmesh_vec.cpp b/indra/newview/llviewerjointmesh_vec.cpp
index 8fb9d1cf68..a1225c9d1c 100644
--- a/indra/newview/llviewerjointmesh_vec.cpp
+++ b/indra/newview/llviewerjointmesh_vec.cpp
@@ -52,6 +52,7 @@
// static
void LLViewerJointMesh::updateGeometryVectorized(LLFace *face, LLPolyMesh *mesh)
{
+#if 0
static LLV4Matrix4 sJointMat[32];
LLDynamicArray<LLJointRenderData*>& joint_data = mesh->getReferenceMesh()->mJointRenderData;
S32 j, joint_num, joint_end = joint_data.count();
@@ -98,4 +99,5 @@ void LLViewerJointMesh::updateGeometryVectorized(LLFace *face, LLPolyMesh *mesh)
}
buffer->setBuffer(0);
+#endif
}