summaryrefslogtreecommitdiff
path: root/indra/newview/llviewerjointmesh.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'indra/newview/llviewerjointmesh.cpp')
-rw-r--r--indra/newview/llviewerjointmesh.cpp524
1 files changed, 144 insertions, 380 deletions
diff --git a/indra/newview/llviewerjointmesh.cpp b/indra/newview/llviewerjointmesh.cpp
index e59e685f53..e052e37393 100644
--- a/indra/newview/llviewerjointmesh.cpp
+++ b/indra/newview/llviewerjointmesh.cpp
@@ -55,6 +55,7 @@
#include "v4math.h"
#include "m3math.h"
#include "m4math.h"
+#include "llmatrix4a.h"
#if !LL_DARWIN && !LL_LINUX && !LL_SOLARIS
extern PFNGLWEIGHTPOINTERARBPROC glWeightPointerARB;
@@ -62,7 +63,6 @@ extern PFNGLWEIGHTFVARBPROC glWeightfvARB;
extern PFNGLVERTEXBLENDARBPROC glVertexBlendARB;
#endif
-static LLPointer<LLVertexBuffer> sRenderBuffer = NULL;
static const U32 sRenderMask = LLVertexBuffer::MAP_VERTEX |
LLVertexBuffer::MAP_NORMAL |
LLVertexBuffer::MAP_TEXCOORD0;
@@ -375,6 +375,7 @@ const S32 NUM_AXES = 3;
// pivot parent 0-n -- child = n+1
static LLMatrix4 gJointMatUnaligned[32];
+static LLMatrix4a gJointMatAligned[32];
static LLMatrix3 gJointRotUnaligned[32];
static LLVector4 gJointPivot[32];
@@ -457,9 +458,20 @@ void LLViewerJointMesh::uploadJointMatrices()
}
}
stop_glerror();
- glUniform4fvARB(gAvatarMatrixParam, 45, mat);
+ if (LLGLSLShader::sCurBoundShaderPtr)
+ {
+ LLGLSLShader::sCurBoundShaderPtr->uniform4fv(LLViewerShaderMgr::AVATAR_MATRIX, 45, mat);
+ }
stop_glerror();
}
+ else
+ {
+ //load gJointMatUnaligned into gJointMatAligned
+ for (joint_num = 0; joint_num < reference_mesh->mJointRenderData.count(); ++joint_num)
+ {
+ gJointMatAligned[joint_num].loadu(gJointMatUnaligned[joint_num]);
+ }
+ }
}
//--------------------------------------------------------------------
@@ -501,27 +513,30 @@ int compare_int(const void *a, const void *b)
U32 LLViewerJointMesh::drawShape( F32 pixelArea, BOOL first_pass, BOOL is_dummy)
{
if (!mValid || !mMesh || !mFace || !mVisible ||
- mFace->mVertexBuffer.isNull() ||
- mMesh->getNumFaces() == 0)
+ !mFace->getVertexBuffer() ||
+ mMesh->getNumFaces() == 0 ||
+ (LLGLSLShader::sNoFixedFunction && LLGLSLShader::sCurBoundShaderPtr == NULL))
{
return 0;
}
U32 triangle_count = 0;
+ S32 diffuse_channel = LLDrawPoolAvatar::sDiffuseChannel;
+
stop_glerror();
//----------------------------------------------------------------
// setup current color
//----------------------------------------------------------------
if (is_dummy)
- glColor4fv(LLVOAvatar::getDummyColor().mV);
+ gGL.diffuseColor4fv(LLVOAvatar::getDummyColor().mV);
else
- glColor4fv(mColor.mV);
+ gGL.diffuseColor4fv(mColor.mV);
stop_glerror();
- LLGLSSpecular specular(LLColor4(1.f,1.f,1.f,1.f), mShiny && !(mFace->getPool()->getVertexShaderLevel() > 0));
+ LLGLSSpecular specular(LLColor4(1.f,1.f,1.f,1.f), (mFace->getPool()->getVertexShaderLevel() > 0 || LLGLSLShader::sNoFixedFunction) ? 0.f : mShiny);
//----------------------------------------------------------------
// setup current texture
@@ -531,33 +546,27 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea, BOOL first_pass, BOOL is_dummy)
LLTexUnit::eTextureAddressMode old_mode = LLTexUnit::TAM_WRAP;
if (mTestImageName)
{
- gGL.getTexUnit(0)->bindManual(LLTexUnit::TT_TEXTURE, mTestImageName);
+ gGL.getTexUnit(diffuse_channel)->bindManual(LLTexUnit::TT_TEXTURE, mTestImageName);
if (mIsTransparent)
{
- glColor4f(1.f, 1.f, 1.f, 1.f);
+ gGL.diffuseColor4f(1.f, 1.f, 1.f, 1.f);
}
else
{
- glColor4f(0.7f, 0.6f, 0.3f, 1.f);
- gGL.getTexUnit(0)->setTextureColorBlend(LLTexUnit::TBO_LERP_TEX_ALPHA, LLTexUnit::TBS_TEX_COLOR, LLTexUnit::TBS_PREV_COLOR);
+ gGL.diffuseColor4f(0.7f, 0.6f, 0.3f, 1.f);
+ gGL.getTexUnit(diffuse_channel)->setTextureColorBlend(LLTexUnit::TBO_LERP_TEX_ALPHA, LLTexUnit::TBS_TEX_COLOR, LLTexUnit::TBS_PREV_COLOR);
}
}
else if( !is_dummy && mLayerSet )
{
if( mLayerSet->hasComposite() )
{
- gGL.getTexUnit(0)->bind(mLayerSet->getComposite());
+ gGL.getTexUnit(diffuse_channel)->bind(mLayerSet->getComposite());
}
else
{
- // This warning will always trigger if you've hacked the avatar to show as incomplete.
- // Ignore the warning if that's the case.
- if (!gSavedSettings.getBOOL("RenderUnloadedAvatar"))
- {
- //llwarns << "Layerset without composite" << llendl;
- }
- gGL.getTexUnit(0)->bind(LLViewerTextureManager::getFetchedTexture(IMG_DEFAULT));
+ gGL.getTexUnit(diffuse_channel)->bind(LLViewerTextureManager::getFetchedTexture(IMG_DEFAULT));
}
}
else
@@ -567,22 +576,25 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea, BOOL first_pass, BOOL is_dummy)
{
old_mode = mTexture->getAddressMode();
}
- gGL.getTexUnit(0)->bind(mTexture.get());
- gGL.getTexUnit(0)->bind(mTexture);
- gGL.getTexUnit(0)->setTextureAddressMode(LLTexUnit::TAM_CLAMP);
+ gGL.getTexUnit(diffuse_channel)->bind(mTexture.get());
+ gGL.getTexUnit(diffuse_channel)->bind(mTexture);
+ gGL.getTexUnit(diffuse_channel)->setTextureAddressMode(LLTexUnit::TAM_CLAMP);
}
else
{
- gGL.getTexUnit(0)->bind(LLViewerTextureManager::getFetchedTexture(IMG_DEFAULT));
+ gGL.getTexUnit(diffuse_channel)->bind(LLViewerTextureManager::getFetchedTexture(IMG_DEFAULT));
}
- mFace->mVertexBuffer->setBuffer(sRenderMask);
+
+ U32 mask = sRenderMask;
U32 start = mMesh->mFaceVertexOffset;
U32 end = start + mMesh->mFaceVertexCount - 1;
U32 count = mMesh->mFaceIndexCount;
U32 offset = mMesh->mFaceIndexOffset;
+ LLVertexBuffer* buff = mFace->getVertexBuffer();
+
if (mMesh->hasWeights())
{
if ((mFace->getPool()->getVertexShaderLevel() > 0))
@@ -591,17 +603,24 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea, BOOL first_pass, BOOL is_dummy)
{
uploadJointMatrices();
}
+ mask = mask | LLVertexBuffer::MAP_WEIGHT;
+ if (mFace->getPool()->getVertexShaderLevel() > 1)
+ {
+ mask = mask | LLVertexBuffer::MAP_CLOTHWEIGHT;
+ }
}
- mFace->mVertexBuffer->drawRange(LLRender::TRIANGLES, start, end, count, offset);
+ buff->setBuffer(mask);
+ buff->drawRange(LLRender::TRIANGLES, start, end, count, offset);
}
else
{
- glPushMatrix();
+ gGL.pushMatrix();
LLMatrix4 jointToWorld = getWorldMatrix();
- glMultMatrixf((GLfloat*)jointToWorld.mMatrix);
- mFace->mVertexBuffer->drawRange(LLRender::TRIANGLES, start, end, count, offset);
- glPopMatrix();
+ gGL.multMatrix((GLfloat*)jointToWorld.mMatrix);
+ buff->setBuffer(mask);
+ buff->drawRange(LLRender::TRIANGLES, start, end, count, offset);
+ gGL.popMatrix();
}
gPipeline.addTrianglesDrawn(count);
@@ -609,13 +628,13 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea, BOOL first_pass, BOOL is_dummy)
if (mTestImageName)
{
- gGL.getTexUnit(0)->setTextureBlendType(LLTexUnit::TB_MULT);
+ gGL.getTexUnit(diffuse_channel)->setTextureBlendType(LLTexUnit::TB_MULT);
}
if (mTexture.notNull() && !is_dummy)
{
- gGL.getTexUnit(0)->bind(mTexture);
- gGL.getTexUnit(0)->setTextureAddressMode(old_mode);
+ gGL.getTexUnit(diffuse_channel)->bind(mTexture);
+ gGL.getTexUnit(diffuse_channel)->setTextureAddressMode(old_mode);
}
return triangle_count;
@@ -626,6 +645,9 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea, BOOL first_pass, BOOL is_dummy)
//-----------------------------------------------------------------------------
void LLViewerJointMesh::updateFaceSizes(U32 &num_vertices, U32& num_indices, F32 pixel_area)
{
+ //bump num_vertices to next multiple of 4
+ num_vertices = (num_vertices + 0x3) & ~0x3;
+
// Do a pre-alloc pass to determine sizes of data.
if (mMesh && mValid)
{
@@ -648,13 +670,25 @@ static LLFastTimer::DeclareTimer FTM_AVATAR_FACE("Avatar Face");
void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_wind, bool terse_update)
{
+ //IF THIS FUNCTION BREAKS, SEE LLPOLYMESH CONSTRUCTOR AND CHECK ALIGNMENT OF INPUT ARRAYS
+
mFace = face;
- if (mFace->mVertexBuffer.isNull())
+ if (!mFace->getVertexBuffer())
{
return;
}
+ LLDrawPool *poolp = mFace->getPool();
+ BOOL hardware_skinning = (poolp && poolp->getVertexShaderLevel() > 0) ? TRUE : FALSE;
+
+ if (!hardware_skinning && terse_update)
+ { //no need to do terse updates if we're doing software vertex skinning
+ // since mMesh is being copied into mVertexBuffer every frame
+ return;
+ }
+
+
LLFastTimer t(FTM_AVATAR_FACE);
LLStrider<LLVector3> verticesp;
@@ -667,111 +701,59 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w
// Copy data into the faces from the polymesh data.
if (mMesh && mValid)
{
- if (mMesh->getNumVertices())
+ const U32 num_verts = mMesh->getNumVertices();
+
+ if (num_verts)
{
- stop_glerror();
+ face->getVertexBuffer()->getIndexStrider(indicesp);
face->getGeometryAvatar(verticesp, normalsp, tex_coordsp, vertex_weightsp, clothing_weightsp);
- stop_glerror();
- face->mVertexBuffer->getIndexStrider(indicesp);
- stop_glerror();
-
+
verticesp += mMesh->mFaceVertexOffset;
- tex_coordsp += mMesh->mFaceVertexOffset;
normalsp += mMesh->mFaceVertexOffset;
- vertex_weightsp += mMesh->mFaceVertexOffset;
- clothing_weightsp += mMesh->mFaceVertexOffset;
+
+ F32* v = (F32*) verticesp.get();
+ F32* n = (F32*) normalsp.get();
+
+ U32 words = num_verts*4;
+
+ LLVector4a::memcpyNonAliased16(v, (F32*) mMesh->getCoords(), words*sizeof(F32));
+ LLVector4a::memcpyNonAliased16(n, (F32*) mMesh->getNormals(), words*sizeof(F32));
+
+
+ if (!terse_update)
+ {
+ vertex_weightsp += mMesh->mFaceVertexOffset;
+ clothing_weightsp += mMesh->mFaceVertexOffset;
+ tex_coordsp += mMesh->mFaceVertexOffset;
+
+ F32* tc = (F32*) tex_coordsp.get();
+ F32* vw = (F32*) vertex_weightsp.get();
+ F32* cw = (F32*) clothing_weightsp.get();
- const U32* __restrict coords = (U32*) mMesh->getCoords();
- const U32* __restrict tex_coords = (U32*) mMesh->getTexCoords();
- const U32* __restrict normals = (U32*) mMesh->getNormals();
- const U32* __restrict weights = (U32*) mMesh->getWeights();
- const U32* __restrict cloth_weights = (U32*) mMesh->getClothingWeights();
+ LLVector4a::memcpyNonAliased16(tc, (F32*) mMesh->getTexCoords(), num_verts*2*sizeof(F32));
+ LLVector4a::memcpyNonAliased16(vw, (F32*) mMesh->getWeights(), num_verts*sizeof(F32));
+ LLVector4a::memcpyNonAliased16(cw, (F32*) mMesh->getClothingWeights(), num_verts*4*sizeof(F32));
+ }
- const U32 num_verts = mMesh->getNumVertices();
+ const U32 idx_count = mMesh->getNumFaces()*3;
- U32 i = 0;
+ indicesp += mMesh->mFaceIndexOffset;
- const U32 skip = verticesp.getSkip()/sizeof(U32);
+ U16* __restrict idx = indicesp.get();
+ S32* __restrict src_idx = (S32*) mMesh->getFaces();
- U32* __restrict v = (U32*) verticesp.get();
- U32* __restrict n = (U32*) normalsp.get();
-
- if (terse_update)
+ const S32 offset = (S32) mMesh->mFaceVertexOffset;
+
+ for (S32 i = 0; i < idx_count; ++i)
{
- for (S32 i = num_verts; i > 0; --i)
- {
- //morph target application only, only update positions and normals
- v[0] = coords[0];
- v[1] = coords[1];
- v[2] = coords[2];
- coords += 3;
- v += skip;
- }
-
- for (S32 i = num_verts; i > 0; --i)
- {
- n[0] = normals[0];
- n[1] = normals[1];
- n[2] = normals[2];
- normals += 3;
- n += skip;
- }
- }
- else
- {
-
- U32* __restrict tc = (U32*) tex_coordsp.get();
- U32* __restrict vw = (U32*) vertex_weightsp.get();
- U32* __restrict cw = (U32*) clothing_weightsp.get();
-
- do
- {
- v[0] = *(coords++);
- v[1] = *(coords++);
- v[2] = *(coords++);
- v += skip;
-
- tc[0] = *(tex_coords++);
- tc[1] = *(tex_coords++);
- tc += skip;
-
- n[0] = *(normals++);
- n[1] = *(normals++);
- n[2] = *(normals++);
- n += skip;
-
- vw[0] = *(weights++);
- vw += skip;
-
- cw[0] = *(cloth_weights++);
- cw[1] = *(cloth_weights++);
- cw[2] = *(cloth_weights++);
- cw[3] = *(cloth_weights++);
- cw += skip;
- }
- while (++i < num_verts);
-
- const U32 idx_count = mMesh->getNumFaces()*3;
-
- indicesp += mMesh->mFaceIndexOffset;
-
- U16* __restrict idx = indicesp.get();
- S32* __restrict src_idx = (S32*) mMesh->getFaces();
-
- i = 0;
-
- const S32 offset = (S32) mMesh->mFaceVertexOffset;
-
- do
- {
- *(idx++) = *(src_idx++)+offset;
- }
- while (++i < idx_count);
+ *(idx++) = *(src_idx++)+offset;
}
}
}
}
+
+
//-----------------------------------------------------------------------------
// updateLOD()
//-----------------------------------------------------------------------------
@@ -783,155 +765,58 @@ BOOL LLViewerJointMesh::updateLOD(F32 pixel_area, BOOL activate)
}
// static
-void LLViewerJointMesh::updateGeometryOriginal(LLFace *mFace, LLPolyMesh *mMesh)
+void LLViewerJointMesh::updateGeometry(LLFace *mFace, LLPolyMesh *mMesh)
{
LLStrider<LLVector3> o_vertices;
LLStrider<LLVector3> o_normals;
//get vertex and normal striders
- LLVertexBuffer *buffer = mFace->mVertexBuffer;
+ LLVertexBuffer* buffer = mFace->getVertexBuffer();
buffer->getVertexStrider(o_vertices, 0);
buffer->getNormalStrider(o_normals, 0);
- F32 last_weight = F32_MAX;
- LLMatrix4 gBlendMat;
- LLMatrix3 gBlendRotMat;
+ F32* __restrict vert = o_vertices[0].mV;
+ F32* __restrict norm = o_normals[0].mV;
- const F32* weights = mMesh->getWeights();
- const LLVector3* coords = mMesh->getCoords();
- const LLVector3* normals = mMesh->getNormals();
- for (U32 index = 0; index < mMesh->getNumVertices(); index++)
- {
- U32 bidx = index + mMesh->mFaceVertexOffset;
-
- // blend by first matrix
- F32 w = weights[index];
-
- // Maybe we don't have to change gBlendMat.
- // Profiles of a single-avatar scene on a Mac show this to be a very
- // common case. JC
- if (w == last_weight)
- {
- o_vertices[bidx] = coords[index] * gBlendMat;
- o_normals[bidx] = normals[index] * gBlendRotMat;
- continue;
- }
-
- last_weight = w;
+ const F32* __restrict weights = mMesh->getWeights();
+ const LLVector4a* __restrict coords = (LLVector4a*) mMesh->getCoords();
+ const LLVector4a* __restrict normals = (LLVector4a*) mMesh->getNormals();
- S32 joint = llfloor(w);
- w -= joint;
-
- // No lerp required in this case.
- if (w == 1.0f)
- {
- gBlendMat = gJointMatUnaligned[joint+1];
- o_vertices[bidx] = coords[index] * gBlendMat;
- gBlendRotMat = gJointRotUnaligned[joint+1];
- o_normals[bidx] = normals[index] * gBlendRotMat;
- continue;
- }
-
- // Try to keep all the accesses to the matrix data as close
- // together as possible. This function is a hot spot on the
- // Mac. JC
- LLMatrix4 &m0 = gJointMatUnaligned[joint+1];
- LLMatrix4 &m1 = gJointMatUnaligned[joint+0];
-
- gBlendMat.mMatrix[VX][VX] = lerp(m1.mMatrix[VX][VX], m0.mMatrix[VX][VX], w);
- gBlendMat.mMatrix[VX][VY] = lerp(m1.mMatrix[VX][VY], m0.mMatrix[VX][VY], w);
- gBlendMat.mMatrix[VX][VZ] = lerp(m1.mMatrix[VX][VZ], m0.mMatrix[VX][VZ], w);
-
- gBlendMat.mMatrix[VY][VX] = lerp(m1.mMatrix[VY][VX], m0.mMatrix[VY][VX], w);
- gBlendMat.mMatrix[VY][VY] = lerp(m1.mMatrix[VY][VY], m0.mMatrix[VY][VY], w);
- gBlendMat.mMatrix[VY][VZ] = lerp(m1.mMatrix[VY][VZ], m0.mMatrix[VY][VZ], w);
-
- gBlendMat.mMatrix[VZ][VX] = lerp(m1.mMatrix[VZ][VX], m0.mMatrix[VZ][VX], w);
- gBlendMat.mMatrix[VZ][VY] = lerp(m1.mMatrix[VZ][VY], m0.mMatrix[VZ][VY], w);
- gBlendMat.mMatrix[VZ][VZ] = lerp(m1.mMatrix[VZ][VZ], m0.mMatrix[VZ][VZ], w);
-
- gBlendMat.mMatrix[VW][VX] = lerp(m1.mMatrix[VW][VX], m0.mMatrix[VW][VX], w);
- gBlendMat.mMatrix[VW][VY] = lerp(m1.mMatrix[VW][VY], m0.mMatrix[VW][VY], w);
- gBlendMat.mMatrix[VW][VZ] = lerp(m1.mMatrix[VW][VZ], m0.mMatrix[VW][VZ], w);
-
- o_vertices[bidx] = coords[index] * gBlendMat;
-
- LLMatrix3 &n0 = gJointRotUnaligned[joint+1];
- LLMatrix3 &n1 = gJointRotUnaligned[joint+0];
-
- gBlendRotMat.mMatrix[VX][VX] = lerp(n1.mMatrix[VX][VX], n0.mMatrix[VX][VX], w);
- gBlendRotMat.mMatrix[VX][VY] = lerp(n1.mMatrix[VX][VY], n0.mMatrix[VX][VY], w);
- gBlendRotMat.mMatrix[VX][VZ] = lerp(n1.mMatrix[VX][VZ], n0.mMatrix[VX][VZ], w);
-
- gBlendRotMat.mMatrix[VY][VX] = lerp(n1.mMatrix[VY][VX], n0.mMatrix[VY][VX], w);
- gBlendRotMat.mMatrix[VY][VY] = lerp(n1.mMatrix[VY][VY], n0.mMatrix[VY][VY], w);
- gBlendRotMat.mMatrix[VY][VZ] = lerp(n1.mMatrix[VY][VZ], n0.mMatrix[VY][VZ], w);
-
- gBlendRotMat.mMatrix[VZ][VX] = lerp(n1.mMatrix[VZ][VX], n0.mMatrix[VZ][VX], w);
- gBlendRotMat.mMatrix[VZ][VY] = lerp(n1.mMatrix[VZ][VY], n0.mMatrix[VZ][VY], w);
- gBlendRotMat.mMatrix[VZ][VZ] = lerp(n1.mMatrix[VZ][VZ], n0.mMatrix[VZ][VZ], w);
-
- o_normals[bidx] = normals[index] * gBlendRotMat;
- }
+ U32 offset = mMesh->mFaceVertexOffset*4;
+ vert += offset;
+ norm += offset;
- buffer->setBuffer(0);
-}
+ for (U32 index = 0; index < mMesh->getNumVertices(); index++)
+ {
+ // equivalent to joint = floorf(weights[index]);
+ S32 joint = _mm_cvtt_ss2si(_mm_load_ss(weights+index));
+ F32 w = weights[index] - joint;
-const U32 UPDATE_GEOMETRY_CALL_MASK = 0x1FFF; // 8K samples before overflow
-const U32 UPDATE_GEOMETRY_CALL_OVERFLOW = ~UPDATE_GEOMETRY_CALL_MASK;
-static bool sUpdateGeometryCallPointer = false;
-static F64 sUpdateGeometryGlobalTime = 0.0 ;
-static F64 sUpdateGeometryElapsedTime = 0.0 ;
-static F64 sUpdateGeometryElapsedTimeOff = 0.0 ;
-static F64 sUpdateGeometryElapsedTimeOn = 0.0 ;
-static F64 sUpdateGeometryRunAvgOff[10];
-static F64 sUpdateGeometryRunAvgOn[10];
-static U32 sUpdateGeometryRunCount = 0 ;
-static U32 sUpdateGeometryCalls = 0 ;
-static U32 sUpdateGeometryLastProcessor = 0 ;
-static BOOL sVectorizePerfTest = FALSE;
-static U32 sVectorizeProcessor = 0;
-
-//static
-void (*LLViewerJointMesh::sUpdateGeometryFunc)(LLFace* face, LLPolyMesh* mesh);
-
-//static
-void LLViewerJointMesh::updateVectorize()
-{
- sVectorizePerfTest = gSavedSettings.getBOOL("VectorizePerfTest");
- sVectorizeProcessor = gSavedSettings.getU32("VectorizeProcessor");
- BOOL vectorizeEnable = gSavedSettings.getBOOL("VectorizeEnable");
- BOOL vectorizeSkin = gSavedSettings.getBOOL("VectorizeSkin");
+ LLMatrix4a gBlendMat;
- std::string vp;
- switch(sVectorizeProcessor)
- {
- case 2: vp = "SSE2"; break; // *TODO: replace the magic #s
- case 1: vp = "SSE"; break;
- default: vp = "COMPILER DEFAULT"; break;
- }
- LL_INFOS("AppInit") << "Vectorization : " << ( vectorizeEnable ? "ENABLED" : "DISABLED" ) << LL_ENDL ;
- LL_INFOS("AppInit") << "Vector Processor : " << vp << LL_ENDL ;
- LL_INFOS("AppInit") << "Vectorized Skinning : " << ( vectorizeSkin ? "ENABLED" : "DISABLED" ) << LL_ENDL ;
- if(vectorizeEnable && vectorizeSkin)
- {
- switch(sVectorizeProcessor)
+ if (w != 0.f)
{
- case 2:
- sUpdateGeometryFunc = &updateGeometrySSE2;
- break;
- case 1:
- sUpdateGeometryFunc = &updateGeometrySSE;
- break;
- default:
- sUpdateGeometryFunc = &updateGeometryVectorized;
- break;
+ // blend between matrices and apply
+ gBlendMat.setLerp(gJointMatAligned[joint+0],
+ gJointMatAligned[joint+1], w);
+
+ LLVector4a res;
+ gBlendMat.affineTransform(coords[index], res);
+ res.store4a(vert+index*4);
+ gBlendMat.rotate(normals[index], res);
+ res.store4a(norm+index*4);
+ }
+ else
+ { // No lerp required in this case.
+ LLVector4a res;
+ gJointMatAligned[joint].affineTransform(coords[index], res);
+ res.store4a(vert+index*4);
+ gJointMatAligned[joint].rotate(normals[index], res);
+ res.store4a(norm+index*4);
}
}
- else
- {
- sUpdateGeometryFunc = &updateGeometryOriginal;
- }
+
+ buffer->flush();
}
void LLViewerJointMesh::updateJointGeometry()
@@ -940,135 +825,14 @@ void LLViewerJointMesh::updateJointGeometry()
&& mMesh
&& mFace
&& mMesh->hasWeights()
- && mFace->mVertexBuffer.notNull()
+ && mFace->getVertexBuffer()
&& LLViewerShaderMgr::instance()->getVertexShaderLevel(LLViewerShaderMgr::SHADER_AVATAR) == 0))
{
return;
}
- if (!sVectorizePerfTest)
- {
- // Once we've measured performance, just run the specified
- // code version.
- if(sUpdateGeometryFunc == updateGeometryOriginal)
- uploadJointMatrices();
- sUpdateGeometryFunc(mFace, mMesh);
- }
- else
- {
- // At startup, measure the amount of time in skinning and choose
- // the fastest one.
- LLTimer ug_timer ;
-
- if (sUpdateGeometryCallPointer)
- {
- if(sUpdateGeometryFunc == updateGeometryOriginal)
- uploadJointMatrices();
- // call accelerated version for this processor
- sUpdateGeometryFunc(mFace, mMesh);
- }
- else
- {
- uploadJointMatrices();
- updateGeometryOriginal(mFace, mMesh);
- }
-
- sUpdateGeometryElapsedTime += ug_timer.getElapsedTimeF64();
- ++sUpdateGeometryCalls;
- if(0 != (sUpdateGeometryCalls & UPDATE_GEOMETRY_CALL_OVERFLOW))
- {
- F64 time_since_app_start = ug_timer.getElapsedSeconds();
- if(sUpdateGeometryGlobalTime == 0.0
- || sUpdateGeometryLastProcessor != sVectorizeProcessor)
- {
- sUpdateGeometryGlobalTime = time_since_app_start;
- sUpdateGeometryElapsedTime = 0;
- sUpdateGeometryCalls = 0;
- sUpdateGeometryRunCount = 0;
- sUpdateGeometryLastProcessor = sVectorizeProcessor;
- sUpdateGeometryCallPointer = false;
- return;
- }
- F64 percent_time_in_function =
- ( sUpdateGeometryElapsedTime * 100.0 ) / ( time_since_app_start - sUpdateGeometryGlobalTime ) ;
- sUpdateGeometryGlobalTime = time_since_app_start;
- if (!sUpdateGeometryCallPointer)
- {
- // First set of run data is with vectorization off.
- sUpdateGeometryCallPointer = true;
- llinfos << "profile (avg of " << sUpdateGeometryCalls << " samples) = "
- << "vectorize off " << percent_time_in_function
- << "% of time with "
- << (sUpdateGeometryElapsedTime / (F64)sUpdateGeometryCalls)
- << " seconds per call "
- << llendl;
- sUpdateGeometryRunAvgOff[sUpdateGeometryRunCount] = percent_time_in_function;
- sUpdateGeometryElapsedTimeOff += sUpdateGeometryElapsedTime;
- sUpdateGeometryCalls = 0;
- }
- else
- {
- // Second set of run data is with vectorization on.
- sUpdateGeometryCallPointer = false;
- llinfos << "profile (avg of " << sUpdateGeometryCalls << " samples) = "
- << "VEC on " << percent_time_in_function
- << "% of time with "
- << (sUpdateGeometryElapsedTime / (F64)sUpdateGeometryCalls)
- << " seconds per call "
- << llendl;
- sUpdateGeometryRunAvgOn[sUpdateGeometryRunCount] = percent_time_in_function ;
- sUpdateGeometryElapsedTimeOn += sUpdateGeometryElapsedTime;
-
- sUpdateGeometryCalls = 0;
- sUpdateGeometryRunCount++;
- F64 a = 0.0, b = 0.0;
- for(U32 i = 0; i<sUpdateGeometryRunCount; i++)
- {
- a += sUpdateGeometryRunAvgOff[i];
- b += sUpdateGeometryRunAvgOn[i];
- }
- a /= sUpdateGeometryRunCount;
- b /= sUpdateGeometryRunCount;
- F64 perf_boost = ( sUpdateGeometryElapsedTimeOff - sUpdateGeometryElapsedTimeOn ) / sUpdateGeometryElapsedTimeOn;
- llinfos << "run averages (" << (F64)sUpdateGeometryRunCount
- << "/10) vectorize off " << a
- << "% : vectorize type " << sVectorizeProcessor
- << " " << b
- << "% : performance boost "
- << perf_boost * 100.0
- << "%"
- << llendl ;
- if(sUpdateGeometryRunCount == 10)
- {
- // In case user runs test again, force reset of data on
- // next run.
- sUpdateGeometryGlobalTime = 0.0;
-
- // We have data now on which version is faster. Switch to that
- // code and save the data for next run.
- gSavedSettings.setBOOL("VectorizePerfTest", FALSE);
-
- if (perf_boost > 0.0)
- {
- llinfos << "Vectorization improves avatar skinning performance, "
- << "keeping on for future runs."
- << llendl;
- gSavedSettings.setBOOL("VectorizeSkin", TRUE);
- }
- else
- {
- // SIMD decreases performance, fall back to original code
- llinfos << "Vectorization decreases avatar skinning performance, "
- << "switching back to original code."
- << llendl;
-
- gSavedSettings.setBOOL("VectorizeSkin", FALSE);
- }
- }
- }
- sUpdateGeometryElapsedTime = 0.0f;
- }
- }
+ uploadJointMatrices();
+ updateGeometry(mFace, mMesh);
}
void LLViewerJointMesh::dump()