diff options
Diffstat (limited to 'indra/newview/llviewerjointmesh.cpp')
-rwxr-xr-x[-rw-r--r--] | indra/newview/llviewerjointmesh.cpp | 836 |
1 files changed, 156 insertions, 680 deletions
diff --git a/indra/newview/llviewerjointmesh.cpp b/indra/newview/llviewerjointmesh.cpp index e59e685f53..bf7cf08c63 100644..100755 --- a/indra/newview/llviewerjointmesh.cpp +++ b/indra/newview/llviewerjointmesh.cpp @@ -29,7 +29,6 @@ //----------------------------------------------------------------------------- #include "llviewerprecompiledheaders.h" -#include "imageids.h" #include "llfasttimer.h" #include "llrender.h" @@ -42,7 +41,7 @@ #include "llface.h" #include "llgldbg.h" #include "llglheaders.h" -#include "lltexlayer.h" +#include "llviewertexlayer.h" #include "llviewercamera.h" #include "llviewercontrol.h" #include "llviewertexturelist.h" @@ -55,6 +54,7 @@ #include "v4math.h" #include "m3math.h" #include "m4math.h" +#include "llmatrix4a.h" #if !LL_DARWIN && !LL_LINUX && !LL_SOLARIS extern PFNGLWEIGHTPOINTERARBPROC glWeightPointerARB; @@ -62,106 +62,24 @@ extern PFNGLWEIGHTFVARBPROC glWeightfvARB; extern PFNGLVERTEXBLENDARBPROC glVertexBlendARB; #endif -static LLPointer<LLVertexBuffer> sRenderBuffer = NULL; static const U32 sRenderMask = LLVertexBuffer::MAP_VERTEX | LLVertexBuffer::MAP_NORMAL | LLVertexBuffer::MAP_TEXCOORD0; - -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- -// LLViewerJointMesh::LLSkinJoint -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- - -//----------------------------------------------------------------------------- -// LLSkinJoint -//----------------------------------------------------------------------------- -LLSkinJoint::LLSkinJoint() -{ - mJoint = NULL; -} - -//----------------------------------------------------------------------------- -// ~LLSkinJoint -//----------------------------------------------------------------------------- -LLSkinJoint::~LLSkinJoint() -{ - mJoint = NULL; -} - - -//----------------------------------------------------------------------------- -// LLSkinJoint::setupSkinJoint() -//----------------------------------------------------------------------------- -BOOL LLSkinJoint::setupSkinJoint( LLViewerJoint *joint) -{ - // find the named joint - mJoint = joint; - if ( !mJoint ) - { - llinfos << "Can't find joint" << llendl; - } - - // compute the inverse root skin matrix - mRootToJointSkinOffset.clearVec(); - - LLVector3 rootSkinOffset; - while (joint) - { - rootSkinOffset += joint->getSkinOffset(); - joint = (LLViewerJoint*)joint->getParent(); - } - - mRootToJointSkinOffset = -rootSkinOffset; - mRootToParentJointSkinOffset = mRootToJointSkinOffset; - mRootToParentJointSkinOffset += mJoint->getSkinOffset(); - - return TRUE; -} - - //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- // LLViewerJointMesh //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- -BOOL LLViewerJointMesh::sPipelineRender = FALSE; -EAvatarRenderPass LLViewerJointMesh::sRenderPass = AVATAR_RENDER_PASS_SINGLE; -U32 LLViewerJointMesh::sClothingMaskImageName = 0; -LLColor4 LLViewerJointMesh::sClothingInnerColor; //----------------------------------------------------------------------------- // LLViewerJointMesh() //----------------------------------------------------------------------------- LLViewerJointMesh::LLViewerJointMesh() : - mTexture( NULL ), - mLayerSet( NULL ), - mTestImageName( 0 ), - mFaceIndexCount(0), - mIsTransparent(FALSE) + LLAvatarJointMesh() { - - mColor[0] = 1.0f; - mColor[1] = 1.0f; - mColor[2] = 1.0f; - mColor[3] = 1.0f; - mShiny = 0.0f; - mCullBackFaces = TRUE; - - mMesh = NULL; - - mNumSkinJoints = 0; - mSkinJoints = NULL; - - mFace = NULL; - - mMeshID = 0; - mUpdateXform = FALSE; - - mValid = FALSE; } @@ -171,199 +89,6 @@ LLViewerJointMesh::LLViewerJointMesh() //----------------------------------------------------------------------------- LLViewerJointMesh::~LLViewerJointMesh() { - mMesh = NULL; - mTexture = NULL; - freeSkinData(); -} - - -//----------------------------------------------------------------------------- -// LLViewerJointMesh::allocateSkinData() -//----------------------------------------------------------------------------- -BOOL LLViewerJointMesh::allocateSkinData( U32 numSkinJoints ) -{ - mSkinJoints = new LLSkinJoint[ numSkinJoints ]; - mNumSkinJoints = numSkinJoints; - return TRUE; -} - -//----------------------------------------------------------------------------- -// LLViewerJointMesh::freeSkinData() -//----------------------------------------------------------------------------- -void LLViewerJointMesh::freeSkinData() -{ - mNumSkinJoints = 0; - delete [] mSkinJoints; - mSkinJoints = NULL; -} - -//-------------------------------------------------------------------- -// LLViewerJointMesh::getColor() -//-------------------------------------------------------------------- -void LLViewerJointMesh::getColor( F32 *red, F32 *green, F32 *blue, F32 *alpha ) -{ - *red = mColor[0]; - *green = mColor[1]; - *blue = mColor[2]; - *alpha = mColor[3]; -} - -//-------------------------------------------------------------------- -// LLViewerJointMesh::setColor() -//-------------------------------------------------------------------- -void LLViewerJointMesh::setColor( F32 red, F32 green, F32 blue, F32 alpha ) -{ - mColor[0] = red; - mColor[1] = green; - mColor[2] = blue; - mColor[3] = alpha; -} - - -//-------------------------------------------------------------------- -// LLViewerJointMesh::getTexture() -//-------------------------------------------------------------------- -//LLViewerTexture *LLViewerJointMesh::getTexture() -//{ -// return mTexture; -//} - -//-------------------------------------------------------------------- -// LLViewerJointMesh::setTexture() -//-------------------------------------------------------------------- -void LLViewerJointMesh::setTexture( LLViewerTexture *texture ) -{ - mTexture = texture; - - // texture and dynamic_texture are mutually exclusive - if( texture ) - { - mLayerSet = NULL; - //texture->bindTexture(0); - //texture->setClamp(TRUE, TRUE); - } -} - -//-------------------------------------------------------------------- -// LLViewerJointMesh::setLayerSet() -// Sets the shape texture (takes precedence over normal texture) -//-------------------------------------------------------------------- -void LLViewerJointMesh::setLayerSet( LLTexLayerSet* layer_set ) -{ - mLayerSet = layer_set; - - // texture and dynamic_texture are mutually exclusive - if( layer_set ) - { - mTexture = NULL; - } -} - - - -//-------------------------------------------------------------------- -// LLViewerJointMesh::getMesh() -//-------------------------------------------------------------------- -LLPolyMesh *LLViewerJointMesh::getMesh() -{ - return mMesh; -} - -//----------------------------------------------------------------------------- -// LLViewerJointMesh::setMesh() -//----------------------------------------------------------------------------- -void LLViewerJointMesh::setMesh( LLPolyMesh *mesh ) -{ - // set the mesh pointer - mMesh = mesh; - - // release any existing skin joints - freeSkinData(); - - if ( mMesh == NULL ) - { - return; - } - - // acquire the transform from the mesh object - setPosition( mMesh->getPosition() ); - setRotation( mMesh->getRotation() ); - setScale( mMesh->getScale() ); - - // create skin joints if necessary - if ( mMesh->hasWeights() && !mMesh->isLOD()) - { - U32 numJointNames = mMesh->getNumJointNames(); - - allocateSkinData( numJointNames ); - std::string *jointNames = mMesh->getJointNames(); - - U32 jn; - for (jn = 0; jn < numJointNames; jn++) - { - //llinfos << "Setting up joint " << jointNames[jn] << llendl; - LLViewerJoint* joint = (LLViewerJoint*)(getRoot()->findJoint(jointNames[jn]) ); - mSkinJoints[jn].setupSkinJoint( joint ); - } - } - - // setup joint array - if (!mMesh->isLOD()) - { - setupJoint((LLViewerJoint*)getRoot()); - } - -// llinfos << "joint render entries: " << mMesh->mJointRenderData.count() << llendl; -} - -//----------------------------------------------------------------------------- -// setupJoint() -//----------------------------------------------------------------------------- -void LLViewerJointMesh::setupJoint(LLViewerJoint* current_joint) -{ -// llinfos << "Mesh: " << getName() << llendl; - -// S32 joint_count = 0; - U32 sj; - for (sj=0; sj<mNumSkinJoints; sj++) - { - LLSkinJoint &js = mSkinJoints[sj]; - - if (js.mJoint != current_joint) - { - continue; - } - - // we've found a skinjoint for this joint.. - - // is the last joint in the array our parent? - if(mMesh->mJointRenderData.count() && mMesh->mJointRenderData[mMesh->mJointRenderData.count() - 1]->mWorldMatrix == ¤t_joint->getParent()->getWorldMatrix()) - { - // ...then just add ourselves - LLViewerJoint* jointp = js.mJoint; - mMesh->mJointRenderData.put(new LLJointRenderData(&jointp->getWorldMatrix(), &js)); -// llinfos << "joint " << joint_count << js.mJoint->getName() << llendl; -// joint_count++; - } - // otherwise add our parent and ourselves - else - { - mMesh->mJointRenderData.put(new LLJointRenderData(¤t_joint->getParent()->getWorldMatrix(), NULL)); -// llinfos << "joint " << joint_count << current_joint->getParent()->getName() << llendl; -// joint_count++; - mMesh->mJointRenderData.put(new LLJointRenderData(¤t_joint->getWorldMatrix(), &js)); -// llinfos << "joint " << joint_count << current_joint->getName() << llendl; -// joint_count++; - } - } - - // depth-first traversal - for (LLJoint::child_list_t::iterator iter = current_joint->mChildren.begin(); - iter != current_joint->mChildren.end(); ++iter) - { - LLViewerJoint* child_joint = (LLViewerJoint*)(*iter); - setupJoint(child_joint); - } } const S32 NUM_AXES = 3; @@ -375,6 +100,7 @@ const S32 NUM_AXES = 3; // pivot parent 0-n -- child = n+1 static LLMatrix4 gJointMatUnaligned[32]; +static LLMatrix4a gJointMatAligned[32]; static LLMatrix3 gJointRotUnaligned[32]; static LLVector4 gJointPivot[32]; @@ -389,7 +115,7 @@ void LLViewerJointMesh::uploadJointMatrices() BOOL hardware_skinning = (poolp && poolp->getVertexShaderLevel() > 0) ? TRUE : FALSE; //calculate joint matrices - for (joint_num = 0; joint_num < reference_mesh->mJointRenderData.count(); joint_num++) + for (joint_num = 0; joint_num < reference_mesh->mJointRenderData.size(); joint_num++) { LLMatrix4 joint_mat = *reference_mesh->mJointRenderData[joint_num]->mWorldMatrix; @@ -405,7 +131,7 @@ void LLViewerJointMesh::uploadJointMatrices() S32 j = 0; //upload joint pivots - for (joint_num = 0; joint_num < reference_mesh->mJointRenderData.count(); joint_num++) + for (joint_num = 0; joint_num < reference_mesh->mJointRenderData.size(); joint_num++) { LLSkinJoint *sj = reference_mesh->mJointRenderData[joint_num]->mSkinJoint; if (sj) @@ -445,7 +171,7 @@ void LLViewerJointMesh::uploadJointMatrices() GLfloat mat[45*4]; memset(mat, 0, sizeof(GLfloat)*45*4); - for (joint_num = 0; joint_num < reference_mesh->mJointRenderData.count(); joint_num++) + for (joint_num = 0; joint_num < reference_mesh->mJointRenderData.size(); joint_num++) { gJointMatUnaligned[joint_num].transpose(); @@ -457,24 +183,20 @@ void LLViewerJointMesh::uploadJointMatrices() } } stop_glerror(); - glUniform4fvARB(gAvatarMatrixParam, 45, mat); + if (LLGLSLShader::sCurBoundShaderPtr) + { + LLGLSLShader::sCurBoundShaderPtr->uniform4fv(LLViewerShaderMgr::AVATAR_MATRIX, 45, mat); + } stop_glerror(); } -} - -//-------------------------------------------------------------------- -// LLViewerJointMesh::drawBone() -//-------------------------------------------------------------------- -void LLViewerJointMesh::drawBone() -{ -} - -//-------------------------------------------------------------------- -// LLViewerJointMesh::isTransparent() -//-------------------------------------------------------------------- -BOOL LLViewerJointMesh::isTransparent() -{ - return mIsTransparent; + else + { + //load gJointMatUnaligned into gJointMatAligned + for (joint_num = 0; joint_num < reference_mesh->mJointRenderData.size(); ++joint_num) + { + gJointMatAligned[joint_num].loadu(gJointMatUnaligned[joint_num]); + } + } } //-------------------------------------------------------------------- @@ -501,27 +223,30 @@ int compare_int(const void *a, const void *b) U32 LLViewerJointMesh::drawShape( F32 pixelArea, BOOL first_pass, BOOL is_dummy) { if (!mValid || !mMesh || !mFace || !mVisible || - mFace->mVertexBuffer.isNull() || - mMesh->getNumFaces() == 0) + !mFace->getVertexBuffer() || + mMesh->getNumFaces() == 0 || + (LLGLSLShader::sNoFixedFunction && LLGLSLShader::sCurBoundShaderPtr == NULL)) { return 0; } U32 triangle_count = 0; + S32 diffuse_channel = LLDrawPoolAvatar::sDiffuseChannel; + stop_glerror(); //---------------------------------------------------------------- // setup current color //---------------------------------------------------------------- if (is_dummy) - glColor4fv(LLVOAvatar::getDummyColor().mV); + gGL.diffuseColor4fv(LLVOAvatar::getDummyColor().mV); else - glColor4fv(mColor.mV); + gGL.diffuseColor4fv(mColor.mV); stop_glerror(); - LLGLSSpecular specular(LLColor4(1.f,1.f,1.f,1.f), mShiny && !(mFace->getPool()->getVertexShaderLevel() > 0)); + LLGLSSpecular specular(LLColor4(1.f,1.f,1.f,1.f), (mFace->getPool()->getVertexShaderLevel() > 0 || LLGLSLShader::sNoFixedFunction) ? 0.f : mShiny); //---------------------------------------------------------------- // setup current texture @@ -529,35 +254,30 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea, BOOL first_pass, BOOL is_dummy) llassert( !(mTexture.notNull() && mLayerSet) ); // mutually exclusive LLTexUnit::eTextureAddressMode old_mode = LLTexUnit::TAM_WRAP; + LLViewerTexLayerSet *layerset = dynamic_cast<LLViewerTexLayerSet*>(mLayerSet); if (mTestImageName) { - gGL.getTexUnit(0)->bindManual(LLTexUnit::TT_TEXTURE, mTestImageName); + gGL.getTexUnit(diffuse_channel)->bindManual(LLTexUnit::TT_TEXTURE, mTestImageName); if (mIsTransparent) { - glColor4f(1.f, 1.f, 1.f, 1.f); + gGL.diffuseColor4f(1.f, 1.f, 1.f, 1.f); } else { - glColor4f(0.7f, 0.6f, 0.3f, 1.f); - gGL.getTexUnit(0)->setTextureColorBlend(LLTexUnit::TBO_LERP_TEX_ALPHA, LLTexUnit::TBS_TEX_COLOR, LLTexUnit::TBS_PREV_COLOR); + gGL.diffuseColor4f(0.7f, 0.6f, 0.3f, 1.f); + gGL.getTexUnit(diffuse_channel)->setTextureColorBlend(LLTexUnit::TBO_LERP_TEX_ALPHA, LLTexUnit::TBS_TEX_COLOR, LLTexUnit::TBS_PREV_COLOR); } } - else if( !is_dummy && mLayerSet ) + else if( !is_dummy && layerset ) { - if( mLayerSet->hasComposite() ) + if( layerset->hasComposite() ) { - gGL.getTexUnit(0)->bind(mLayerSet->getComposite()); + gGL.getTexUnit(diffuse_channel)->bind(layerset->getViewerComposite()); } else { - // This warning will always trigger if you've hacked the avatar to show as incomplete. - // Ignore the warning if that's the case. - if (!gSavedSettings.getBOOL("RenderUnloadedAvatar")) - { - //llwarns << "Layerset without composite" << llendl; - } - gGL.getTexUnit(0)->bind(LLViewerTextureManager::getFetchedTexture(IMG_DEFAULT)); + gGL.getTexUnit(diffuse_channel)->bind(LLViewerTextureManager::getFetchedTexture(IMG_DEFAULT)); } } else @@ -567,22 +287,24 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea, BOOL first_pass, BOOL is_dummy) { old_mode = mTexture->getAddressMode(); } - gGL.getTexUnit(0)->bind(mTexture.get()); - gGL.getTexUnit(0)->bind(mTexture); - gGL.getTexUnit(0)->setTextureAddressMode(LLTexUnit::TAM_CLAMP); + gGL.getTexUnit(diffuse_channel)->bind(mTexture); + gGL.getTexUnit(diffuse_channel)->setTextureAddressMode(LLTexUnit::TAM_CLAMP); } else { - gGL.getTexUnit(0)->bind(LLViewerTextureManager::getFetchedTexture(IMG_DEFAULT)); + gGL.getTexUnit(diffuse_channel)->bind(LLViewerTextureManager::getFetchedTexture(IMG_DEFAULT)); } - mFace->mVertexBuffer->setBuffer(sRenderMask); + + U32 mask = sRenderMask; U32 start = mMesh->mFaceVertexOffset; U32 end = start + mMesh->mFaceVertexCount - 1; U32 count = mMesh->mFaceIndexCount; U32 offset = mMesh->mFaceIndexOffset; + LLVertexBuffer* buff = mFace->getVertexBuffer(); + if (mMesh->hasWeights()) { if ((mFace->getPool()->getVertexShaderLevel() > 0)) @@ -591,17 +313,24 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea, BOOL first_pass, BOOL is_dummy) { uploadJointMatrices(); } + mask = mask | LLVertexBuffer::MAP_WEIGHT; + if (mFace->getPool()->getVertexShaderLevel() > 1) + { + mask = mask | LLVertexBuffer::MAP_CLOTHWEIGHT; + } } - mFace->mVertexBuffer->drawRange(LLRender::TRIANGLES, start, end, count, offset); + buff->setBuffer(mask); + buff->drawRange(LLRender::TRIANGLES, start, end, count, offset); } else { - glPushMatrix(); + gGL.pushMatrix(); LLMatrix4 jointToWorld = getWorldMatrix(); - glMultMatrixf((GLfloat*)jointToWorld.mMatrix); - mFace->mVertexBuffer->drawRange(LLRender::TRIANGLES, start, end, count, offset); - glPopMatrix(); + gGL.multMatrix((GLfloat*)jointToWorld.mMatrix); + buff->setBuffer(mask); + buff->drawRange(LLRender::TRIANGLES, start, end, count, offset); + gGL.popMatrix(); } gPipeline.addTrianglesDrawn(count); @@ -609,13 +338,13 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea, BOOL first_pass, BOOL is_dummy) if (mTestImageName) { - gGL.getTexUnit(0)->setTextureBlendType(LLTexUnit::TB_MULT); + gGL.getTexUnit(diffuse_channel)->setTextureBlendType(LLTexUnit::TB_MULT); } if (mTexture.notNull() && !is_dummy) { - gGL.getTexUnit(0)->bind(mTexture); - gGL.getTexUnit(0)->setTextureAddressMode(old_mode); + gGL.getTexUnit(diffuse_channel)->bind(mTexture); + gGL.getTexUnit(diffuse_channel)->setTextureAddressMode(old_mode); } return triangle_count; @@ -626,6 +355,9 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea, BOOL first_pass, BOOL is_dummy) //----------------------------------------------------------------------------- void LLViewerJointMesh::updateFaceSizes(U32 &num_vertices, U32& num_indices, F32 pixel_area) { + //bump num_vertices to next multiple of 4 + num_vertices = (num_vertices + 0x3) & ~0x3; + // Do a pre-alloc pass to determine sizes of data. if (mMesh && mValid) { @@ -644,18 +376,30 @@ void LLViewerJointMesh::updateFaceSizes(U32 &num_vertices, U32& num_indices, F32 //----------------------------------------------------------------------------- // updateFaceData() //----------------------------------------------------------------------------- -static LLFastTimer::DeclareTimer FTM_AVATAR_FACE("Avatar Face"); +static LLTrace::BlockTimerStatHandle FTM_AVATAR_FACE("Avatar Face"); void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_wind, bool terse_update) { + //IF THIS FUNCTION BREAKS, SEE LLPOLYMESH CONSTRUCTOR AND CHECK ALIGNMENT OF INPUT ARRAYS + mFace = face; - if (mFace->mVertexBuffer.isNull()) + if (!mFace->getVertexBuffer()) { return; } - LLFastTimer t(FTM_AVATAR_FACE); + LLDrawPool *poolp = mFace->getPool(); + BOOL hardware_skinning = (poolp && poolp->getVertexShaderLevel() > 0) ? TRUE : FALSE; + + if (!hardware_skinning && terse_update) + { //no need to do terse updates if we're doing software vertex skinning + // since mMesh is being copied into mVertexBuffer every frame + return; + } + + + LL_RECORD_BLOCK_TIME(FTM_AVATAR_FACE); LLStrider<LLVector3> verticesp; LLStrider<LLVector3> normalsp; @@ -667,111 +411,61 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w // Copy data into the faces from the polymesh data. if (mMesh && mValid) { - if (mMesh->getNumVertices()) + const U32 num_verts = mMesh->getNumVertices(); + + if (num_verts) { - stop_glerror(); + face->getVertexBuffer()->getIndexStrider(indicesp); face->getGeometryAvatar(verticesp, normalsp, tex_coordsp, vertex_weightsp, clothing_weightsp); - stop_glerror(); - face->mVertexBuffer->getIndexStrider(indicesp); - stop_glerror(); - + verticesp += mMesh->mFaceVertexOffset; - tex_coordsp += mMesh->mFaceVertexOffset; normalsp += mMesh->mFaceVertexOffset; - vertex_weightsp += mMesh->mFaceVertexOffset; - clothing_weightsp += mMesh->mFaceVertexOffset; + + F32* v = (F32*) verticesp.get(); + F32* n = (F32*) normalsp.get(); + + U32 words = num_verts*4; + + LLVector4a::memcpyNonAliased16(v, (F32*) mMesh->getCoords(), words*sizeof(F32)); + LLVector4a::memcpyNonAliased16(n, (F32*) mMesh->getNormals(), words*sizeof(F32)); + + + if (!terse_update) + { + vertex_weightsp += mMesh->mFaceVertexOffset; + clothing_weightsp += mMesh->mFaceVertexOffset; + tex_coordsp += mMesh->mFaceVertexOffset; + + F32* tc = (F32*) tex_coordsp.get(); + F32* vw = (F32*) vertex_weightsp.get(); + F32* cw = (F32*) clothing_weightsp.get(); + + S32 tc_size = (num_verts*2*sizeof(F32)+0xF) & ~0xF; + LLVector4a::memcpyNonAliased16(tc, (F32*) mMesh->getTexCoords(), tc_size); + S32 vw_size = (num_verts*sizeof(F32)+0xF) & ~0xF; + LLVector4a::memcpyNonAliased16(vw, (F32*) mMesh->getWeights(), vw_size); + LLVector4a::memcpyNonAliased16(cw, (F32*) mMesh->getClothingWeights(), num_verts*4*sizeof(F32)); + } - const U32* __restrict coords = (U32*) mMesh->getCoords(); - const U32* __restrict tex_coords = (U32*) mMesh->getTexCoords(); - const U32* __restrict normals = (U32*) mMesh->getNormals(); - const U32* __restrict weights = (U32*) mMesh->getWeights(); - const U32* __restrict cloth_weights = (U32*) mMesh->getClothingWeights(); + const U32 idx_count = mMesh->getNumFaces()*3; - const U32 num_verts = mMesh->getNumVertices(); + indicesp += mMesh->mFaceIndexOffset; - U32 i = 0; + U16* __restrict idx = indicesp.get(); + S32* __restrict src_idx = (S32*) mMesh->getFaces(); - const U32 skip = verticesp.getSkip()/sizeof(U32); + const S32 offset = (S32) mMesh->mFaceVertexOffset; - U32* __restrict v = (U32*) verticesp.get(); - U32* __restrict n = (U32*) normalsp.get(); - - if (terse_update) + for (S32 i = 0; i < idx_count; ++i) { - for (S32 i = num_verts; i > 0; --i) - { - //morph target application only, only update positions and normals - v[0] = coords[0]; - v[1] = coords[1]; - v[2] = coords[2]; - coords += 3; - v += skip; - } - - for (S32 i = num_verts; i > 0; --i) - { - n[0] = normals[0]; - n[1] = normals[1]; - n[2] = normals[2]; - normals += 3; - n += skip; - } - } - else - { - - U32* __restrict tc = (U32*) tex_coordsp.get(); - U32* __restrict vw = (U32*) vertex_weightsp.get(); - U32* __restrict cw = (U32*) clothing_weightsp.get(); - - do - { - v[0] = *(coords++); - v[1] = *(coords++); - v[2] = *(coords++); - v += skip; - - tc[0] = *(tex_coords++); - tc[1] = *(tex_coords++); - tc += skip; - - n[0] = *(normals++); - n[1] = *(normals++); - n[2] = *(normals++); - n += skip; - - vw[0] = *(weights++); - vw += skip; - - cw[0] = *(cloth_weights++); - cw[1] = *(cloth_weights++); - cw[2] = *(cloth_weights++); - cw[3] = *(cloth_weights++); - cw += skip; - } - while (++i < num_verts); - - const U32 idx_count = mMesh->getNumFaces()*3; - - indicesp += mMesh->mFaceIndexOffset; - - U16* __restrict idx = indicesp.get(); - S32* __restrict src_idx = (S32*) mMesh->getFaces(); - - i = 0; - - const S32 offset = (S32) mMesh->mFaceVertexOffset; - - do - { - *(idx++) = *(src_idx++)+offset; - } - while (++i < idx_count); + *(idx++) = *(src_idx++)+offset; } } } } + + //----------------------------------------------------------------------------- // updateLOD() //----------------------------------------------------------------------------- @@ -783,155 +477,58 @@ BOOL LLViewerJointMesh::updateLOD(F32 pixel_area, BOOL activate) } // static -void LLViewerJointMesh::updateGeometryOriginal(LLFace *mFace, LLPolyMesh *mMesh) +void LLViewerJointMesh::updateGeometry(LLFace *mFace, LLPolyMesh *mMesh) { LLStrider<LLVector3> o_vertices; LLStrider<LLVector3> o_normals; //get vertex and normal striders - LLVertexBuffer *buffer = mFace->mVertexBuffer; + LLVertexBuffer* buffer = mFace->getVertexBuffer(); buffer->getVertexStrider(o_vertices, 0); buffer->getNormalStrider(o_normals, 0); - F32 last_weight = F32_MAX; - LLMatrix4 gBlendMat; - LLMatrix3 gBlendRotMat; - - const F32* weights = mMesh->getWeights(); - const LLVector3* coords = mMesh->getCoords(); - const LLVector3* normals = mMesh->getNormals(); - for (U32 index = 0; index < mMesh->getNumVertices(); index++) - { - U32 bidx = index + mMesh->mFaceVertexOffset; - - // blend by first matrix - F32 w = weights[index]; - - // Maybe we don't have to change gBlendMat. - // Profiles of a single-avatar scene on a Mac show this to be a very - // common case. JC - if (w == last_weight) - { - o_vertices[bidx] = coords[index] * gBlendMat; - o_normals[bidx] = normals[index] * gBlendRotMat; - continue; - } - - last_weight = w; - - S32 joint = llfloor(w); - w -= joint; - - // No lerp required in this case. - if (w == 1.0f) - { - gBlendMat = gJointMatUnaligned[joint+1]; - o_vertices[bidx] = coords[index] * gBlendMat; - gBlendRotMat = gJointRotUnaligned[joint+1]; - o_normals[bidx] = normals[index] * gBlendRotMat; - continue; - } - - // Try to keep all the accesses to the matrix data as close - // together as possible. This function is a hot spot on the - // Mac. JC - LLMatrix4 &m0 = gJointMatUnaligned[joint+1]; - LLMatrix4 &m1 = gJointMatUnaligned[joint+0]; - - gBlendMat.mMatrix[VX][VX] = lerp(m1.mMatrix[VX][VX], m0.mMatrix[VX][VX], w); - gBlendMat.mMatrix[VX][VY] = lerp(m1.mMatrix[VX][VY], m0.mMatrix[VX][VY], w); - gBlendMat.mMatrix[VX][VZ] = lerp(m1.mMatrix[VX][VZ], m0.mMatrix[VX][VZ], w); + F32* __restrict vert = o_vertices[0].mV; + F32* __restrict norm = o_normals[0].mV; - gBlendMat.mMatrix[VY][VX] = lerp(m1.mMatrix[VY][VX], m0.mMatrix[VY][VX], w); - gBlendMat.mMatrix[VY][VY] = lerp(m1.mMatrix[VY][VY], m0.mMatrix[VY][VY], w); - gBlendMat.mMatrix[VY][VZ] = lerp(m1.mMatrix[VY][VZ], m0.mMatrix[VY][VZ], w); + const F32* __restrict weights = mMesh->getWeights(); + const LLVector4a* __restrict coords = (LLVector4a*) mMesh->getCoords(); + const LLVector4a* __restrict normals = (LLVector4a*) mMesh->getNormals(); - gBlendMat.mMatrix[VZ][VX] = lerp(m1.mMatrix[VZ][VX], m0.mMatrix[VZ][VX], w); - gBlendMat.mMatrix[VZ][VY] = lerp(m1.mMatrix[VZ][VY], m0.mMatrix[VZ][VY], w); - gBlendMat.mMatrix[VZ][VZ] = lerp(m1.mMatrix[VZ][VZ], m0.mMatrix[VZ][VZ], w); + U32 offset = mMesh->mFaceVertexOffset*4; + vert += offset; + norm += offset; - gBlendMat.mMatrix[VW][VX] = lerp(m1.mMatrix[VW][VX], m0.mMatrix[VW][VX], w); - gBlendMat.mMatrix[VW][VY] = lerp(m1.mMatrix[VW][VY], m0.mMatrix[VW][VY], w); - gBlendMat.mMatrix[VW][VZ] = lerp(m1.mMatrix[VW][VZ], m0.mMatrix[VW][VZ], w); - - o_vertices[bidx] = coords[index] * gBlendMat; - - LLMatrix3 &n0 = gJointRotUnaligned[joint+1]; - LLMatrix3 &n1 = gJointRotUnaligned[joint+0]; - - gBlendRotMat.mMatrix[VX][VX] = lerp(n1.mMatrix[VX][VX], n0.mMatrix[VX][VX], w); - gBlendRotMat.mMatrix[VX][VY] = lerp(n1.mMatrix[VX][VY], n0.mMatrix[VX][VY], w); - gBlendRotMat.mMatrix[VX][VZ] = lerp(n1.mMatrix[VX][VZ], n0.mMatrix[VX][VZ], w); - - gBlendRotMat.mMatrix[VY][VX] = lerp(n1.mMatrix[VY][VX], n0.mMatrix[VY][VX], w); - gBlendRotMat.mMatrix[VY][VY] = lerp(n1.mMatrix[VY][VY], n0.mMatrix[VY][VY], w); - gBlendRotMat.mMatrix[VY][VZ] = lerp(n1.mMatrix[VY][VZ], n0.mMatrix[VY][VZ], w); - - gBlendRotMat.mMatrix[VZ][VX] = lerp(n1.mMatrix[VZ][VX], n0.mMatrix[VZ][VX], w); - gBlendRotMat.mMatrix[VZ][VY] = lerp(n1.mMatrix[VZ][VY], n0.mMatrix[VZ][VY], w); - gBlendRotMat.mMatrix[VZ][VZ] = lerp(n1.mMatrix[VZ][VZ], n0.mMatrix[VZ][VZ], w); - - o_normals[bidx] = normals[index] * gBlendRotMat; - } + for (U32 index = 0; index < mMesh->getNumVertices(); index++) + { + // equivalent to joint = floorf(weights[index]); + S32 joint = _mm_cvtt_ss2si(_mm_load_ss(weights+index)); + F32 w = weights[index] - joint; - buffer->setBuffer(0); -} + LLMatrix4a gBlendMat; -const U32 UPDATE_GEOMETRY_CALL_MASK = 0x1FFF; // 8K samples before overflow -const U32 UPDATE_GEOMETRY_CALL_OVERFLOW = ~UPDATE_GEOMETRY_CALL_MASK; -static bool sUpdateGeometryCallPointer = false; -static F64 sUpdateGeometryGlobalTime = 0.0 ; -static F64 sUpdateGeometryElapsedTime = 0.0 ; -static F64 sUpdateGeometryElapsedTimeOff = 0.0 ; -static F64 sUpdateGeometryElapsedTimeOn = 0.0 ; -static F64 sUpdateGeometryRunAvgOff[10]; -static F64 sUpdateGeometryRunAvgOn[10]; -static U32 sUpdateGeometryRunCount = 0 ; -static U32 sUpdateGeometryCalls = 0 ; -static U32 sUpdateGeometryLastProcessor = 0 ; -static BOOL sVectorizePerfTest = FALSE; -static U32 sVectorizeProcessor = 0; - -//static -void (*LLViewerJointMesh::sUpdateGeometryFunc)(LLFace* face, LLPolyMesh* mesh); - -//static -void LLViewerJointMesh::updateVectorize() -{ - sVectorizePerfTest = gSavedSettings.getBOOL("VectorizePerfTest"); - sVectorizeProcessor = gSavedSettings.getU32("VectorizeProcessor"); - BOOL vectorizeEnable = gSavedSettings.getBOOL("VectorizeEnable"); - BOOL vectorizeSkin = gSavedSettings.getBOOL("VectorizeSkin"); - - std::string vp; - switch(sVectorizeProcessor) - { - case 2: vp = "SSE2"; break; // *TODO: replace the magic #s - case 1: vp = "SSE"; break; - default: vp = "COMPILER DEFAULT"; break; - } - LL_INFOS("AppInit") << "Vectorization : " << ( vectorizeEnable ? "ENABLED" : "DISABLED" ) << LL_ENDL ; - LL_INFOS("AppInit") << "Vector Processor : " << vp << LL_ENDL ; - LL_INFOS("AppInit") << "Vectorized Skinning : " << ( vectorizeSkin ? "ENABLED" : "DISABLED" ) << LL_ENDL ; - if(vectorizeEnable && vectorizeSkin) - { - switch(sVectorizeProcessor) + if (w != 0.f) { - case 2: - sUpdateGeometryFunc = &updateGeometrySSE2; - break; - case 1: - sUpdateGeometryFunc = &updateGeometrySSE; - break; - default: - sUpdateGeometryFunc = &updateGeometryVectorized; - break; + // blend between matrices and apply + gBlendMat.setLerp(gJointMatAligned[joint+0], + gJointMatAligned[joint+1], w); + + LLVector4a res; + gBlendMat.affineTransform(coords[index], res); + res.store4a(vert+index*4); + gBlendMat.rotate(normals[index], res); + res.store4a(norm+index*4); + } + else + { // No lerp required in this case. + LLVector4a res; + gJointMatAligned[joint].affineTransform(coords[index], res); + res.store4a(vert+index*4); + gJointMatAligned[joint].rotate(normals[index], res); + res.store4a(norm+index*4); } } - else - { - sUpdateGeometryFunc = &updateGeometryOriginal; - } + + buffer->flush(); } void LLViewerJointMesh::updateJointGeometry() @@ -940,142 +537,21 @@ void LLViewerJointMesh::updateJointGeometry() && mMesh && mFace && mMesh->hasWeights() - && mFace->mVertexBuffer.notNull() + && mFace->getVertexBuffer() && LLViewerShaderMgr::instance()->getVertexShaderLevel(LLViewerShaderMgr::SHADER_AVATAR) == 0)) { return; } - if (!sVectorizePerfTest) - { - // Once we've measured performance, just run the specified - // code version. - if(sUpdateGeometryFunc == updateGeometryOriginal) - uploadJointMatrices(); - sUpdateGeometryFunc(mFace, mMesh); - } - else - { - // At startup, measure the amount of time in skinning and choose - // the fastest one. - LLTimer ug_timer ; - - if (sUpdateGeometryCallPointer) - { - if(sUpdateGeometryFunc == updateGeometryOriginal) - uploadJointMatrices(); - // call accelerated version for this processor - sUpdateGeometryFunc(mFace, mMesh); - } - else - { - uploadJointMatrices(); - updateGeometryOriginal(mFace, mMesh); - } - - sUpdateGeometryElapsedTime += ug_timer.getElapsedTimeF64(); - ++sUpdateGeometryCalls; - if(0 != (sUpdateGeometryCalls & UPDATE_GEOMETRY_CALL_OVERFLOW)) - { - F64 time_since_app_start = ug_timer.getElapsedSeconds(); - if(sUpdateGeometryGlobalTime == 0.0 - || sUpdateGeometryLastProcessor != sVectorizeProcessor) - { - sUpdateGeometryGlobalTime = time_since_app_start; - sUpdateGeometryElapsedTime = 0; - sUpdateGeometryCalls = 0; - sUpdateGeometryRunCount = 0; - sUpdateGeometryLastProcessor = sVectorizeProcessor; - sUpdateGeometryCallPointer = false; - return; - } - F64 percent_time_in_function = - ( sUpdateGeometryElapsedTime * 100.0 ) / ( time_since_app_start - sUpdateGeometryGlobalTime ) ; - sUpdateGeometryGlobalTime = time_since_app_start; - if (!sUpdateGeometryCallPointer) - { - // First set of run data is with vectorization off. - sUpdateGeometryCallPointer = true; - llinfos << "profile (avg of " << sUpdateGeometryCalls << " samples) = " - << "vectorize off " << percent_time_in_function - << "% of time with " - << (sUpdateGeometryElapsedTime / (F64)sUpdateGeometryCalls) - << " seconds per call " - << llendl; - sUpdateGeometryRunAvgOff[sUpdateGeometryRunCount] = percent_time_in_function; - sUpdateGeometryElapsedTimeOff += sUpdateGeometryElapsedTime; - sUpdateGeometryCalls = 0; - } - else - { - // Second set of run data is with vectorization on. - sUpdateGeometryCallPointer = false; - llinfos << "profile (avg of " << sUpdateGeometryCalls << " samples) = " - << "VEC on " << percent_time_in_function - << "% of time with " - << (sUpdateGeometryElapsedTime / (F64)sUpdateGeometryCalls) - << " seconds per call " - << llendl; - sUpdateGeometryRunAvgOn[sUpdateGeometryRunCount] = percent_time_in_function ; - sUpdateGeometryElapsedTimeOn += sUpdateGeometryElapsedTime; - - sUpdateGeometryCalls = 0; - sUpdateGeometryRunCount++; - F64 a = 0.0, b = 0.0; - for(U32 i = 0; i<sUpdateGeometryRunCount; i++) - { - a += sUpdateGeometryRunAvgOff[i]; - b += sUpdateGeometryRunAvgOn[i]; - } - a /= sUpdateGeometryRunCount; - b /= sUpdateGeometryRunCount; - F64 perf_boost = ( sUpdateGeometryElapsedTimeOff - sUpdateGeometryElapsedTimeOn ) / sUpdateGeometryElapsedTimeOn; - llinfos << "run averages (" << (F64)sUpdateGeometryRunCount - << "/10) vectorize off " << a - << "% : vectorize type " << sVectorizeProcessor - << " " << b - << "% : performance boost " - << perf_boost * 100.0 - << "%" - << llendl ; - if(sUpdateGeometryRunCount == 10) - { - // In case user runs test again, force reset of data on - // next run. - sUpdateGeometryGlobalTime = 0.0; - - // We have data now on which version is faster. Switch to that - // code and save the data for next run. - gSavedSettings.setBOOL("VectorizePerfTest", FALSE); - - if (perf_boost > 0.0) - { - llinfos << "Vectorization improves avatar skinning performance, " - << "keeping on for future runs." - << llendl; - gSavedSettings.setBOOL("VectorizeSkin", TRUE); - } - else - { - // SIMD decreases performance, fall back to original code - llinfos << "Vectorization decreases avatar skinning performance, " - << "switching back to original code." - << llendl; - - gSavedSettings.setBOOL("VectorizeSkin", FALSE); - } - } - } - sUpdateGeometryElapsedTime = 0.0f; - } - } + uploadJointMatrices(); + updateGeometry(mFace, mMesh); } void LLViewerJointMesh::dump() { if (mValid) { - llinfos << "Usable LOD " << mName << llendl; + LL_INFOS() << "Usable LOD " << mName << LL_ENDL; } } |