From 095a5e84408b47ef3c5610e111aefe51d77633ca Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Sat, 6 Feb 2010 17:33:12 -0600 Subject: Draw prims using triangle strips instead of triangle lists. --- indra/newview/llviewerjointmesh.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'indra/newview/llviewerjointmesh.cpp') diff --git a/indra/newview/llviewerjointmesh.cpp b/indra/newview/llviewerjointmesh.cpp index 1a67fc0966..7225aa1523 100644 --- a/indra/newview/llviewerjointmesh.cpp +++ b/indra/newview/llviewerjointmesh.cpp @@ -626,7 +626,7 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea, BOOL first_pass, BOOL is_dummy) mFace->mVertexBuffer->drawRange(LLRender::TRIANGLES, start, end, count, offset); glPopMatrix(); } - gPipeline.addTrianglesDrawn(count/3); + gPipeline.addTrianglesDrawn(count); triangle_count += count; -- cgit v1.2.3 From 38158f0e14663ac73c7ed79723ba6e34a1253e2a Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Thu, 18 Feb 2010 23:04:16 -0600 Subject: Model preview now loads materials. --- indra/newview/llviewerjointmesh.cpp | 6 ------ 1 file changed, 6 deletions(-) (limited to 'indra/newview/llviewerjointmesh.cpp') diff --git a/indra/newview/llviewerjointmesh.cpp b/indra/newview/llviewerjointmesh.cpp index 7225aa1523..92029d10f6 100644 --- a/indra/newview/llviewerjointmesh.cpp +++ b/indra/newview/llviewerjointmesh.cpp @@ -561,12 +561,6 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea, BOOL first_pass, BOOL is_dummy) } else { - // This warning will always trigger if you've hacked the avatar to show as incomplete. - // Ignore the warning if that's the case. - if (!gSavedSettings.getBOOL("RenderUnloadedAvatar")) - { - //llwarns << "Layerset without composite" << llendl; - } gGL.getTexUnit(0)->bind(LLViewerTextureManager::getFetchedTexture(IMG_DEFAULT)); } } -- cgit v1.2.3 From 71b0a63c8df29d5d69b777306dcf6280fd98886a Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Fri, 16 Apr 2010 13:00:01 -0500 Subject: Optimize LLViewerJointMesh::updateFaceData --- indra/newview/llviewerjointmesh.cpp | 88 +++++++++++++++++++++++++++++-------- 1 file changed, 69 insertions(+), 19 deletions(-) (limited to 'indra/newview/llviewerjointmesh.cpp') diff --git a/indra/newview/llviewerjointmesh.cpp b/indra/newview/llviewerjointmesh.cpp index 92029d10f6..90a5a29bb4 100644 --- a/indra/newview/llviewerjointmesh.cpp +++ b/indra/newview/llviewerjointmesh.cpp @@ -661,6 +661,8 @@ void LLViewerJointMesh::updateFaceSizes(U32 &num_vertices, U32& num_indices, F32 //----------------------------------------------------------------------------- // updateFaceData() //----------------------------------------------------------------------------- +static LLFastTimer::DeclareTimer FTM_AVATAR_FACE("Avatar Face"); + void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_wind) { mFace = face; @@ -670,6 +672,8 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w return; } + LLFastTimer t(FTM_AVATAR_FACE); + LLStrider verticesp; LLStrider normalsp; LLStrider tex_coordsp; @@ -688,30 +692,76 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w face->mVertexBuffer->getIndexStrider(indicesp); stop_glerror(); - for (U16 i = 0; i < mMesh->getNumVertices(); i++) + verticesp += mMesh->mFaceVertexOffset; + tex_coordsp += mMesh->mFaceVertexOffset; + normalsp += mMesh->mFaceVertexOffset; + vertex_weightsp += mMesh->mFaceVertexOffset; + clothing_weightsp += mMesh->mFaceVertexOffset; + + U32* __restrict v = (U32*) verticesp.get(); + const U32 vert_skip = verticesp.getSkip()/sizeof(U32); + + U32* __restrict tc = (U32*) tex_coordsp.get(); + const U32 tc_skip = tex_coordsp.getSkip()/sizeof(U32); + + U32* __restrict n = (U32*) normalsp.get(); + const U32 n_skip = normalsp.getSkip()/sizeof(U32); + + U32* __restrict vw = (U32*) vertex_weightsp.get(); + const U32 vw_skip = vertex_weightsp.getSkip()/sizeof(U32); + + + U32* __restrict cw = (U32*) clothing_weightsp.get(); + const U32 cw_skip = vertex_weightsp.getSkip()/sizeof(U32); + + const U32* __restrict coords = (U32*) mMesh->getCoords(); + const U32* __restrict tex_coords = (U32*) mMesh->getTexCoords(); + const U32* __restrict normals = (U32*) mMesh->getNormals(); + const U32* __restrict weights = (U32*) mMesh->getWeights(); + const U32* __restrict cloth_weights = (U32*) mMesh->getClothingWeights(); + + const U32 num_verts = mMesh->getNumVertices(); + + U32 i = 0; + do { - verticesp[mMesh->mFaceVertexOffset + i] = *(mMesh->getCoords() + i); - tex_coordsp[mMesh->mFaceVertexOffset + i] = *(mMesh->getTexCoords() + i); - normalsp[mMesh->mFaceVertexOffset + i] = *(mMesh->getNormals() + i); - vertex_weightsp[mMesh->mFaceVertexOffset + i] = *(mMesh->getWeights() + i); - if (damp_wind) - { - clothing_weightsp[mMesh->mFaceVertexOffset + i] = LLVector4(0,0,0,0); - } - else - { - clothing_weightsp[mMesh->mFaceVertexOffset + i] = (*(mMesh->getClothingWeights() + i)); - } + v[0] = *(coords++); + v[1] = *(coords++); + v[2] = *(coords++); + v += vert_skip; + + tc[0] = *(tex_coords++); + tc[1] = *(tex_coords++); + tc += tc_skip; + + n[0] = *(normals++); + n[1] = *(normals++); + n[2] = *(normals++); + n += n_skip; + + vw[0] = *(weights++); + vw += vw_skip; + + cw[0] = *(cloth_weights++); + cw[1] = *(cloth_weights++); + cw[2] = *(cloth_weights++); + cw[3] = *(cloth_weights++); + cw += cw_skip; } + while (++i < num_verts); + + const U32 idx_count = mMesh->getNumFaces()*3; - for (S32 i = 0; i < mMesh->getNumFaces(); i++) + U16* __restrict idx = indicesp.get(); + S32* __restrict src_idx = (S32*) mMesh->getFaces(); + + i = 0; + + do { - for (U32 j = 0; j < 3; j++) - { - U32 k = i*3+j+mMesh->mFaceIndexOffset; - indicesp[k] = mMesh->getFaces()[i][j] + mMesh->mFaceVertexOffset; - } + *(idx++) = *(src_idx++); } + while (++i < idx_count); } } } -- cgit v1.2.3 From 12499cebcba81175ae5d92926f5ec89632f00926 Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Fri, 16 Apr 2010 15:53:26 -0500 Subject: Fix for busted optimizations. --- indra/newview/llviewerjointmesh.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'indra/newview/llviewerjointmesh.cpp') diff --git a/indra/newview/llviewerjointmesh.cpp b/indra/newview/llviewerjointmesh.cpp index 90a5a29bb4..db2279d925 100644 --- a/indra/newview/llviewerjointmesh.cpp +++ b/indra/newview/llviewerjointmesh.cpp @@ -752,14 +752,18 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w const U32 idx_count = mMesh->getNumFaces()*3; + indicesp += mMesh->mFaceIndexOffset; + U16* __restrict idx = indicesp.get(); S32* __restrict src_idx = (S32*) mMesh->getFaces(); i = 0; + const S32 offset = (S32) mMesh->mFaceVertexOffset; + do { - *(idx++) = *(src_idx++); + *(idx++) = *(src_idx++)+offset; } while (++i < idx_count); } -- cgit v1.2.3 From e994b9dcee82b510dc881e2b14d053a27fe35472 Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Mon, 19 Apr 2010 23:33:34 -0500 Subject: Remove foot shadows from llvoavatar and add terse update to LLViewerJointMesh::updateFaceData. --- indra/newview/llviewerjointmesh.cpp | 121 +++++++++++++++++++++--------------- 1 file changed, 70 insertions(+), 51 deletions(-) (limited to 'indra/newview/llviewerjointmesh.cpp') diff --git a/indra/newview/llviewerjointmesh.cpp b/indra/newview/llviewerjointmesh.cpp index db2279d925..fb6cc8d790 100644 --- a/indra/newview/llviewerjointmesh.cpp +++ b/indra/newview/llviewerjointmesh.cpp @@ -663,7 +663,7 @@ void LLViewerJointMesh::updateFaceSizes(U32 &num_vertices, U32& num_indices, F32 //----------------------------------------------------------------------------- static LLFastTimer::DeclareTimer FTM_AVATAR_FACE("Avatar Face"); -void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_wind) +void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_wind, bool terse_update) { mFace = face; @@ -698,22 +698,6 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w vertex_weightsp += mMesh->mFaceVertexOffset; clothing_weightsp += mMesh->mFaceVertexOffset; - U32* __restrict v = (U32*) verticesp.get(); - const U32 vert_skip = verticesp.getSkip()/sizeof(U32); - - U32* __restrict tc = (U32*) tex_coordsp.get(); - const U32 tc_skip = tex_coordsp.getSkip()/sizeof(U32); - - U32* __restrict n = (U32*) normalsp.get(); - const U32 n_skip = normalsp.getSkip()/sizeof(U32); - - U32* __restrict vw = (U32*) vertex_weightsp.get(); - const U32 vw_skip = vertex_weightsp.getSkip()/sizeof(U32); - - - U32* __restrict cw = (U32*) clothing_weightsp.get(); - const U32 cw_skip = vertex_weightsp.getSkip()/sizeof(U32); - const U32* __restrict coords = (U32*) mMesh->getCoords(); const U32* __restrict tex_coords = (U32*) mMesh->getTexCoords(); const U32* __restrict normals = (U32*) mMesh->getNormals(); @@ -723,49 +707,84 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w const U32 num_verts = mMesh->getNumVertices(); U32 i = 0; - do + + const U32 skip = verticesp.getSkip()/sizeof(U32); + + U32* __restrict v = (U32*) verticesp.get(); + U32* __restrict n = (U32*) normalsp.get(); + + if (terse_update) { - v[0] = *(coords++); - v[1] = *(coords++); - v[2] = *(coords++); - v += vert_skip; - - tc[0] = *(tex_coords++); - tc[1] = *(tex_coords++); - tc += tc_skip; - - n[0] = *(normals++); - n[1] = *(normals++); - n[2] = *(normals++); - n += n_skip; - - vw[0] = *(weights++); - vw += vw_skip; - - cw[0] = *(cloth_weights++); - cw[1] = *(cloth_weights++); - cw[2] = *(cloth_weights++); - cw[3] = *(cloth_weights++); - cw += cw_skip; + for (S32 i = num_verts; i > 0; --i) + { + //morph target application only, only update positions and normals + v[0] = coords[0]; + v[1] = coords[1]; + v[2] = coords[2]; + coords += 3; + v += skip; + } + + for (S32 i = num_verts; i > 0; --i) + { + n[0] = normals[0]; + n[1] = normals[1]; + n[2] = normals[2]; + normals += 3; + n += skip; + } } - while (++i < num_verts); + else + { - const U32 idx_count = mMesh->getNumFaces()*3; + U32* __restrict tc = (U32*) tex_coordsp.get(); + U32* __restrict vw = (U32*) vertex_weightsp.get(); + U32* __restrict cw = (U32*) clothing_weightsp.get(); + + do + { + v[0] = *(coords++); + v[1] = *(coords++); + v[2] = *(coords++); + v += skip; + + tc[0] = *(tex_coords++); + tc[1] = *(tex_coords++); + tc += skip; + + n[0] = *(normals++); + n[1] = *(normals++); + n[2] = *(normals++); + n += skip; + + vw[0] = *(weights++); + vw += skip; + + cw[0] = *(cloth_weights++); + cw[1] = *(cloth_weights++); + cw[2] = *(cloth_weights++); + cw[3] = *(cloth_weights++); + cw += skip; + } + while (++i < num_verts); - indicesp += mMesh->mFaceIndexOffset; + const U32 idx_count = mMesh->getNumFaces()*3; - U16* __restrict idx = indicesp.get(); - S32* __restrict src_idx = (S32*) mMesh->getFaces(); + indicesp += mMesh->mFaceIndexOffset; - i = 0; + U16* __restrict idx = indicesp.get(); + S32* __restrict src_idx = (S32*) mMesh->getFaces(); - const S32 offset = (S32) mMesh->mFaceVertexOffset; + i = 0; - do - { - *(idx++) = *(src_idx++)+offset; + const S32 offset = (S32) mMesh->mFaceVertexOffset; + + do + { + *(idx++) = *(src_idx++)+offset; + } + while (++i < idx_count); } - while (++i < idx_count); } } } -- cgit v1.2.3 From 49579bebdd274a88c2381c4cab3d09ecd393564d Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Mon, 10 May 2010 13:00:36 -0500 Subject: Fix for wierd triangle shadow bug and fix for ATI hating deferred rendering. --- indra/newview/llviewerjointmesh.cpp | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) (limited to 'indra/newview/llviewerjointmesh.cpp') diff --git a/indra/newview/llviewerjointmesh.cpp b/indra/newview/llviewerjointmesh.cpp index fb6cc8d790..deb3d8fd97 100644 --- a/indra/newview/llviewerjointmesh.cpp +++ b/indra/newview/llviewerjointmesh.cpp @@ -516,6 +516,8 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea, BOOL first_pass, BOOL is_dummy) U32 triangle_count = 0; + S32 diffuse_channel = LLDrawPoolAvatar::sDiffuseChannel; + stop_glerror(); //---------------------------------------------------------------- @@ -541,7 +543,7 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea, BOOL first_pass, BOOL is_dummy) LLTexUnit::eTextureAddressMode old_mode = LLTexUnit::TAM_WRAP; if (mTestImageName) { - gGL.getTexUnit(0)->bindManual(LLTexUnit::TT_TEXTURE, mTestImageName); + gGL.getTexUnit(diffuse_channel)->bindManual(LLTexUnit::TT_TEXTURE, mTestImageName); if (mIsTransparent) { @@ -550,18 +552,18 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea, BOOL first_pass, BOOL is_dummy) else { glColor4f(0.7f, 0.6f, 0.3f, 1.f); - gGL.getTexUnit(0)->setTextureColorBlend(LLTexUnit::TBO_LERP_TEX_ALPHA, LLTexUnit::TBS_TEX_COLOR, LLTexUnit::TBS_PREV_COLOR); + gGL.getTexUnit(diffuse_channel)->setTextureColorBlend(LLTexUnit::TBO_LERP_TEX_ALPHA, LLTexUnit::TBS_TEX_COLOR, LLTexUnit::TBS_PREV_COLOR); } } else if( !is_dummy && mLayerSet ) { if( mLayerSet->hasComposite() ) { - gGL.getTexUnit(0)->bind(mLayerSet->getComposite()); + gGL.getTexUnit(diffuse_channel)->bind(mLayerSet->getComposite()); } else { - gGL.getTexUnit(0)->bind(LLViewerTextureManager::getFetchedTexture(IMG_DEFAULT)); + gGL.getTexUnit(diffuse_channel)->bind(LLViewerTextureManager::getFetchedTexture(IMG_DEFAULT)); } } else @@ -571,25 +573,25 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea, BOOL first_pass, BOOL is_dummy) { old_mode = mTexture->getAddressMode(); } - gGL.getTexUnit(0)->bind(mTexture.get()); - gGL.getTexUnit(0)->bind(mTexture); - gGL.getTexUnit(0)->setTextureAddressMode(LLTexUnit::TAM_CLAMP); + gGL.getTexUnit(diffuse_channel)->bind(mTexture.get()); + gGL.getTexUnit(diffuse_channel)->bind(mTexture); + gGL.getTexUnit(diffuse_channel)->setTextureAddressMode(LLTexUnit::TAM_CLAMP); } else { - gGL.getTexUnit(0)->bind(LLViewerTextureManager::getFetchedTexture(IMG_DEFAULT)); + gGL.getTexUnit(diffuse_channel)->bind(LLViewerTextureManager::getFetchedTexture(IMG_DEFAULT)); } if (gRenderForSelect) { if (isTransparent()) { - gGL.getTexUnit(0)->setTextureColorBlend(LLTexUnit::TBO_REPLACE, LLTexUnit::TBS_PREV_COLOR); - gGL.getTexUnit(0)->setTextureAlphaBlend(LLTexUnit::TBO_MULT, LLTexUnit::TBS_TEX_ALPHA, LLTexUnit::TBS_CONST_ALPHA); + gGL.getTexUnit(diffuse_channel)->setTextureColorBlend(LLTexUnit::TBO_REPLACE, LLTexUnit::TBS_PREV_COLOR); + gGL.getTexUnit(diffuse_channel)->setTextureAlphaBlend(LLTexUnit::TBO_MULT, LLTexUnit::TBS_TEX_ALPHA, LLTexUnit::TBS_CONST_ALPHA); } else { - gGL.getTexUnit(0)->unbind(LLTexUnit::TT_TEXTURE); + gGL.getTexUnit(diffuse_channel)->unbind(LLTexUnit::TT_TEXTURE); } } @@ -626,13 +628,13 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea, BOOL first_pass, BOOL is_dummy) if (mTestImageName) { - gGL.getTexUnit(0)->setTextureBlendType(LLTexUnit::TB_MULT); + gGL.getTexUnit(diffuse_channel)->setTextureBlendType(LLTexUnit::TB_MULT); } if (mTexture.notNull() && !is_dummy) { - gGL.getTexUnit(0)->bind(mTexture); - gGL.getTexUnit(0)->setTextureAddressMode(old_mode); + gGL.getTexUnit(diffuse_channel)->bind(mTexture); + gGL.getTexUnit(diffuse_channel)->setTextureAddressMode(old_mode); } return triangle_count; -- cgit v1.2.3 From f14215689244a65064158e475e4f41eb149d85b0 Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Thu, 20 May 2010 02:22:26 -0500 Subject: Vectorized avatar vertex skinning. --- indra/newview/llviewerjointmesh.cpp | 145 +++++++++++++++++------------------- 1 file changed, 70 insertions(+), 75 deletions(-) (limited to 'indra/newview/llviewerjointmesh.cpp') diff --git a/indra/newview/llviewerjointmesh.cpp b/indra/newview/llviewerjointmesh.cpp index deb3d8fd97..294dfdcb55 100644 --- a/indra/newview/llviewerjointmesh.cpp +++ b/indra/newview/llviewerjointmesh.cpp @@ -61,6 +61,7 @@ #include "v4math.h" #include "m3math.h" #include "m4math.h" +#include "llmatrix4a.h" #if !LL_DARWIN && !LL_LINUX && !LL_SOLARIS extern PFNGLWEIGHTPOINTERARBPROC glWeightPointerARB; @@ -382,6 +383,7 @@ const S32 NUM_AXES = 3; // pivot parent 0-n -- child = n+1 static LLMatrix4 gJointMatUnaligned[32]; +static LLMatrix4a gJointMatAligned[32]; static LLMatrix3 gJointRotUnaligned[32]; static LLVector4 gJointPivot[32]; @@ -467,6 +469,14 @@ void LLViewerJointMesh::uploadJointMatrices() glUniform4fvARB(gAvatarMatrixParam, 45, mat); stop_glerror(); } + else + { + //load gJointMatUnaligned into gJointMatAligned + for (joint_num = 0; joint_num < reference_mesh->mJointRenderData.count(); ++joint_num) + { + gJointMatAligned[joint_num].loadu(gJointMatUnaligned[joint_num]); + } + } } //-------------------------------------------------------------------- @@ -723,7 +733,7 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w v[0] = coords[0]; v[1] = coords[1]; v[2] = coords[2]; - coords += 3; + coords += 4; v += skip; } @@ -732,12 +742,12 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w n[0] = normals[0]; n[1] = normals[1]; n[2] = normals[2]; - normals += 3; + normals += 4; n += skip; } } else - { + { U32* __restrict tc = (U32*) tex_coordsp.get(); U32* __restrict vw = (U32*) vertex_weightsp.get(); @@ -745,18 +755,20 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w do { - v[0] = *(coords++); - v[1] = *(coords++); - v[2] = *(coords++); + v[0] = coords[0]; + v[1] = coords[1]; + v[2] = coords[2]; + coords += 4; v += skip; tc[0] = *(tex_coords++); tc[1] = *(tex_coords++); tc += skip; - n[0] = *(normals++); - n[1] = *(normals++); - n[2] = *(normals++); + n[0] = normals[0]; + n[1] = normals[1]; + n[2] = normals[2]; + normals += 4; n += skip; vw[0] = *(weights++); @@ -808,17 +820,17 @@ void LLViewerJointMesh::updateGeometryOriginal(LLFace *mFace, LLPolyMesh *mMesh) LLStrider o_normals; //get vertex and normal striders - LLVertexBuffer *buffer = mFace->mVertexBuffer; + LLVertexBuffer* buffer = mFace->mVertexBuffer; buffer->getVertexStrider(o_vertices, 0); buffer->getNormalStrider(o_normals, 0); - F32 last_weight = F32_MAX; - LLMatrix4 gBlendMat; - LLMatrix3 gBlendRotMat; + //F32 last_weight = F32_MAX; + LLMatrix4a gBlendMat; + + __restrict const F32* weights = mMesh->getWeights(); + __restrict const LLVector4* coords = mMesh->getCoords(); + __restrict const LLVector4* normals = mMesh->getNormals(); - const F32* weights = mMesh->getWeights(); - const LLVector3* coords = mMesh->getCoords(); - const LLVector3* normals = mMesh->getNormals(); for (U32 index = 0; index < mMesh->getNumVertices(); index++) { U32 bidx = index + mMesh->mFaceVertexOffset; @@ -826,71 +838,54 @@ void LLViewerJointMesh::updateGeometryOriginal(LLFace *mFace, LLPolyMesh *mMesh) // blend by first matrix F32 w = weights[index]; + LLVector4a coord; + coord.load4a(coords[index].mV); + + LLVector4a norm; + norm.load4a(normals[index].mV); + // Maybe we don't have to change gBlendMat. // Profiles of a single-avatar scene on a Mac show this to be a very // common case. JC - if (w == last_weight) + //if (w != last_weight) { - o_vertices[bidx] = coords[index] * gBlendMat; - o_normals[bidx] = normals[index] * gBlendRotMat; - continue; - } - - last_weight = w; + //last_weight = w; - S32 joint = llfloor(w); - w -= joint; - - // No lerp required in this case. - if (w == 1.0f) - { - gBlendMat = gJointMatUnaligned[joint+1]; - o_vertices[bidx] = coords[index] * gBlendMat; - gBlendRotMat = gJointRotUnaligned[joint+1]; - o_normals[bidx] = normals[index] * gBlendRotMat; - continue; + S32 joint = llfloor(w); + w -= joint; + + + if (w >= 0.f) + { + // Try to keep all the accesses to the matrix data as close + // together as possible. This function is a hot spot on the + // Mac. JC + gBlendMat.setLerp(gJointMatAligned[joint+0], + gJointMatAligned[joint+1], w); + + LLVector4a res; + gBlendMat.affineTransform(coord, res); + o_vertices[bidx].setVec(res[0], res[1], res[2]); + gBlendMat.rotate(norm, res); + o_normals[bidx].setVec(res[0], res[1], res[2]); + } + else + { // No lerp required in this case. + LLVector4a res; + gJointMatAligned[joint].affineTransform(coord, res); + o_vertices[bidx].setVec(res[0], res[1], res[2]); + gJointMatAligned[joint].rotate(norm, res); + o_normals[bidx].setVec(res[0], res[1], res[2]); + } } - - // Try to keep all the accesses to the matrix data as close - // together as possible. This function is a hot spot on the - // Mac. JC - LLMatrix4 &m0 = gJointMatUnaligned[joint+1]; - LLMatrix4 &m1 = gJointMatUnaligned[joint+0]; - - gBlendMat.mMatrix[VX][VX] = lerp(m1.mMatrix[VX][VX], m0.mMatrix[VX][VX], w); - gBlendMat.mMatrix[VX][VY] = lerp(m1.mMatrix[VX][VY], m0.mMatrix[VX][VY], w); - gBlendMat.mMatrix[VX][VZ] = lerp(m1.mMatrix[VX][VZ], m0.mMatrix[VX][VZ], w); - - gBlendMat.mMatrix[VY][VX] = lerp(m1.mMatrix[VY][VX], m0.mMatrix[VY][VX], w); - gBlendMat.mMatrix[VY][VY] = lerp(m1.mMatrix[VY][VY], m0.mMatrix[VY][VY], w); - gBlendMat.mMatrix[VY][VZ] = lerp(m1.mMatrix[VY][VZ], m0.mMatrix[VY][VZ], w); - - gBlendMat.mMatrix[VZ][VX] = lerp(m1.mMatrix[VZ][VX], m0.mMatrix[VZ][VX], w); - gBlendMat.mMatrix[VZ][VY] = lerp(m1.mMatrix[VZ][VY], m0.mMatrix[VZ][VY], w); - gBlendMat.mMatrix[VZ][VZ] = lerp(m1.mMatrix[VZ][VZ], m0.mMatrix[VZ][VZ], w); - - gBlendMat.mMatrix[VW][VX] = lerp(m1.mMatrix[VW][VX], m0.mMatrix[VW][VX], w); - gBlendMat.mMatrix[VW][VY] = lerp(m1.mMatrix[VW][VY], m0.mMatrix[VW][VY], w); - gBlendMat.mMatrix[VW][VZ] = lerp(m1.mMatrix[VW][VZ], m0.mMatrix[VW][VZ], w); - - o_vertices[bidx] = coords[index] * gBlendMat; - - LLMatrix3 &n0 = gJointRotUnaligned[joint+1]; - LLMatrix3 &n1 = gJointRotUnaligned[joint+0]; - - gBlendRotMat.mMatrix[VX][VX] = lerp(n1.mMatrix[VX][VX], n0.mMatrix[VX][VX], w); - gBlendRotMat.mMatrix[VX][VY] = lerp(n1.mMatrix[VX][VY], n0.mMatrix[VX][VY], w); - gBlendRotMat.mMatrix[VX][VZ] = lerp(n1.mMatrix[VX][VZ], n0.mMatrix[VX][VZ], w); - - gBlendRotMat.mMatrix[VY][VX] = lerp(n1.mMatrix[VY][VX], n0.mMatrix[VY][VX], w); - gBlendRotMat.mMatrix[VY][VY] = lerp(n1.mMatrix[VY][VY], n0.mMatrix[VY][VY], w); - gBlendRotMat.mMatrix[VY][VZ] = lerp(n1.mMatrix[VY][VZ], n0.mMatrix[VY][VZ], w); - - gBlendRotMat.mMatrix[VZ][VX] = lerp(n1.mMatrix[VZ][VX], n0.mMatrix[VZ][VX], w); - gBlendRotMat.mMatrix[VZ][VY] = lerp(n1.mMatrix[VZ][VY], n0.mMatrix[VZ][VY], w); - gBlendRotMat.mMatrix[VZ][VZ] = lerp(n1.mMatrix[VZ][VZ], n0.mMatrix[VZ][VZ], w); - - o_normals[bidx] = normals[index] * gBlendRotMat; + /*else + { //weight didn't change + LLVector4a res; + gBlendMat.affineTransform(coord, res); + o_vertices[bidx].setVec(res[0], res[1], res[2]); + gBlendMat.rotate(norm, res); + o_normals[bidx].setVec(res[0], res[1], res[2]); + }*/ } buffer->setBuffer(0); -- cgit v1.2.3 From bf5f215fbc29102cfd8b5418f29ea0ed6edd14ee Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Thu, 20 May 2010 02:46:01 -0500 Subject: Cleanup from review. --- indra/newview/llviewerjointmesh.cpp | 69 ++++++++++++++----------------------- 1 file changed, 26 insertions(+), 43 deletions(-) (limited to 'indra/newview/llviewerjointmesh.cpp') diff --git a/indra/newview/llviewerjointmesh.cpp b/indra/newview/llviewerjointmesh.cpp index 294dfdcb55..236ad98d68 100644 --- a/indra/newview/llviewerjointmesh.cpp +++ b/indra/newview/llviewerjointmesh.cpp @@ -827,9 +827,9 @@ void LLViewerJointMesh::updateGeometryOriginal(LLFace *mFace, LLPolyMesh *mMesh) //F32 last_weight = F32_MAX; LLMatrix4a gBlendMat; - __restrict const F32* weights = mMesh->getWeights(); - __restrict const LLVector4* coords = mMesh->getCoords(); - __restrict const LLVector4* normals = mMesh->getNormals(); + const F32* __restrict weights = mMesh->getWeights(); + const LLVector4a* __restrict coords = (LLVector4a*) mMesh->getCoords(); + const LLVector4a* __restrict normals = (LLVector4a*) mMesh->getNormals(); for (U32 index = 0; index < mMesh->getNumVertices(); index++) { @@ -838,54 +838,37 @@ void LLViewerJointMesh::updateGeometryOriginal(LLFace *mFace, LLPolyMesh *mMesh) // blend by first matrix F32 w = weights[index]; - LLVector4a coord; - coord.load4a(coords[index].mV); + //LLVector4a coord; + //coord.load4a(coords[index].mV); - LLVector4a norm; - norm.load4a(normals[index].mV); + //LLVector4a norm; + //norm.load4a(normals[index].mV); - // Maybe we don't have to change gBlendMat. - // Profiles of a single-avatar scene on a Mac show this to be a very - // common case. JC - //if (w != last_weight) + S32 joint = llfloor(w); + w -= joint; + + if (w > 0.f) { - //last_weight = w; + // Try to keep all the accesses to the matrix data as close + // together as possible. This function is a hot spot on the + // Mac. JC + gBlendMat.setLerp(gJointMatAligned[joint+0], + gJointMatAligned[joint+1], w); - S32 joint = llfloor(w); - w -= joint; - - - if (w >= 0.f) - { - // Try to keep all the accesses to the matrix data as close - // together as possible. This function is a hot spot on the - // Mac. JC - gBlendMat.setLerp(gJointMatAligned[joint+0], - gJointMatAligned[joint+1], w); - - LLVector4a res; - gBlendMat.affineTransform(coord, res); - o_vertices[bidx].setVec(res[0], res[1], res[2]); - gBlendMat.rotate(norm, res); - o_normals[bidx].setVec(res[0], res[1], res[2]); - } - else - { // No lerp required in this case. - LLVector4a res; - gJointMatAligned[joint].affineTransform(coord, res); - o_vertices[bidx].setVec(res[0], res[1], res[2]); - gJointMatAligned[joint].rotate(norm, res); - o_normals[bidx].setVec(res[0], res[1], res[2]); - } + LLVector4a res; + gBlendMat.affineTransform(coords[index], res); + o_vertices[bidx].setVec(res[0], res[1], res[2]); + gBlendMat.rotate(normals[index], res); + o_normals[bidx].setVec(res[0], res[1], res[2]); } - /*else - { //weight didn't change + else + { // No lerp required in this case. LLVector4a res; - gBlendMat.affineTransform(coord, res); + gJointMatAligned[joint].affineTransform(coords[index], res); o_vertices[bidx].setVec(res[0], res[1], res[2]); - gBlendMat.rotate(norm, res); + gJointMatAligned[joint].rotate(normals[index], res); o_normals[bidx].setVec(res[0], res[1], res[2]); - }*/ + } } buffer->setBuffer(0); -- cgit v1.2.3 From 05a23f8dbaa45c64bcf6c55dd09a468ba2b1f144 Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Fri, 21 May 2010 04:49:12 -0500 Subject: Vectorized memcpy. 16-byte aligned vertex buffers. (almost) fully vectorized avatar vertex buffer updating --- index buffers still need to be vectorized --- indra/newview/llviewerjointmesh.cpp | 169 +++++++++++++----------------------- 1 file changed, 61 insertions(+), 108 deletions(-) (limited to 'indra/newview/llviewerjointmesh.cpp') diff --git a/indra/newview/llviewerjointmesh.cpp b/indra/newview/llviewerjointmesh.cpp index 236ad98d68..a7e7bfadd6 100644 --- a/indra/newview/llviewerjointmesh.cpp +++ b/indra/newview/llviewerjointmesh.cpp @@ -655,6 +655,9 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea, BOOL first_pass, BOOL is_dummy) //----------------------------------------------------------------------------- void LLViewerJointMesh::updateFaceSizes(U32 &num_vertices, U32& num_indices, F32 pixel_area) { + //bump num_vertices to next multiple of 4 + num_vertices = (num_vertices + 0x3) & ~0x3; + // Do a pre-alloc pass to determine sizes of data. if (mMesh && mValid) { @@ -677,6 +680,8 @@ static LLFastTimer::DeclareTimer FTM_AVATAR_FACE("Avatar Face"); void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_wind, bool terse_update) { + //IF THIS FUNCTION BREAKS, SEE LLPOLYMESH CONSTRUCTOR AND CHECK ALIGNMENT OF INPUT ARRAYS + mFace = face; if (mFace->mVertexBuffer.isNull()) @@ -684,6 +689,16 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w return; } + LLDrawPool *poolp = mFace->getPool(); + BOOL hardware_skinning = (poolp && poolp->getVertexShaderLevel() > 0) ? TRUE : FALSE; + + if (!hardware_skinning && terse_update) + { //no need to do terse updates if we're doing software vertex skinning + // since mMesh is being copied into mVertexBuffer every frame + return; + } + + LLFastTimer t(FTM_AVATAR_FACE); LLStrider verticesp; @@ -696,108 +711,52 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w // Copy data into the faces from the polymesh data. if (mMesh && mValid) { - if (mMesh->getNumVertices()) + const U32 num_verts = mMesh->getNumVertices(); + + if (num_verts) { - stop_glerror(); face->getGeometryAvatar(verticesp, normalsp, tex_coordsp, vertex_weightsp, clothing_weightsp); - stop_glerror(); face->mVertexBuffer->getIndexStrider(indicesp); - stop_glerror(); verticesp += mMesh->mFaceVertexOffset; - tex_coordsp += mMesh->mFaceVertexOffset; normalsp += mMesh->mFaceVertexOffset; - vertex_weightsp += mMesh->mFaceVertexOffset; - clothing_weightsp += mMesh->mFaceVertexOffset; - - const U32* __restrict coords = (U32*) mMesh->getCoords(); - const U32* __restrict tex_coords = (U32*) mMesh->getTexCoords(); - const U32* __restrict normals = (U32*) mMesh->getNormals(); - const U32* __restrict weights = (U32*) mMesh->getWeights(); - const U32* __restrict cloth_weights = (U32*) mMesh->getClothingWeights(); - - const U32 num_verts = mMesh->getNumVertices(); - - U32 i = 0; - - const U32 skip = verticesp.getSkip()/sizeof(U32); + + F32* v = (F32*) verticesp.get(); + F32* n = (F32*) normalsp.get(); + + U32 words = num_verts*4; - U32* __restrict v = (U32*) verticesp.get(); - U32* __restrict n = (U32*) normalsp.get(); + LLVector4a::memcpyNonAliased16(v, (F32*) mMesh->getCoords(), words); + LLVector4a::memcpyNonAliased16(n, (F32*) mMesh->getNormals(), words); + - if (terse_update) + if (!terse_update) { - for (S32 i = num_verts; i > 0; --i) - { - //morph target application only, only update positions and normals - v[0] = coords[0]; - v[1] = coords[1]; - v[2] = coords[2]; - coords += 4; - v += skip; - } + vertex_weightsp += mMesh->mFaceVertexOffset; + clothing_weightsp += mMesh->mFaceVertexOffset; + tex_coordsp += mMesh->mFaceVertexOffset; + + F32* tc = (F32*) tex_coordsp.get(); + F32* vw = (F32*) vertex_weightsp.get(); + F32* cw = (F32*) clothing_weightsp.get(); - for (S32 i = num_verts; i > 0; --i) - { - n[0] = normals[0]; - n[1] = normals[1]; - n[2] = normals[2]; - normals += 4; - n += skip; - } + LLVector4a::memcpyNonAliased16(tc, (F32*) mMesh->getTexCoords(), num_verts*2); + LLVector4a::memcpyNonAliased16(vw, (F32*) mMesh->getWeights(), num_verts); + LLVector4a::memcpyNonAliased16(cw, (F32*) mMesh->getClothingWeights(), num_verts*4); } - else - { - - U32* __restrict tc = (U32*) tex_coordsp.get(); - U32* __restrict vw = (U32*) vertex_weightsp.get(); - U32* __restrict cw = (U32*) clothing_weightsp.get(); - - do - { - v[0] = coords[0]; - v[1] = coords[1]; - v[2] = coords[2]; - coords += 4; - v += skip; - - tc[0] = *(tex_coords++); - tc[1] = *(tex_coords++); - tc += skip; - - n[0] = normals[0]; - n[1] = normals[1]; - n[2] = normals[2]; - normals += 4; - n += skip; - - vw[0] = *(weights++); - vw += skip; - - cw[0] = *(cloth_weights++); - cw[1] = *(cloth_weights++); - cw[2] = *(cloth_weights++); - cw[3] = *(cloth_weights++); - cw += skip; - } - while (++i < num_verts); - - const U32 idx_count = mMesh->getNumFaces()*3; - indicesp += mMesh->mFaceIndexOffset; + const U32 idx_count = mMesh->getNumFaces()*3; - U16* __restrict idx = indicesp.get(); - S32* __restrict src_idx = (S32*) mMesh->getFaces(); + indicesp += mMesh->mFaceIndexOffset; - i = 0; + U16* __restrict idx = indicesp.get(); + S32* __restrict src_idx = (S32*) mMesh->getFaces(); - const S32 offset = (S32) mMesh->mFaceVertexOffset; + const S32 offset = (S32) mMesh->mFaceVertexOffset; - do - { - *(idx++) = *(src_idx++)+offset; - } - while (++i < idx_count); + for (S32 i = 0; i < idx_count; ++i) + { + *(idx++) = *(src_idx++)+offset; } } } @@ -824,50 +783,44 @@ void LLViewerJointMesh::updateGeometryOriginal(LLFace *mFace, LLPolyMesh *mMesh) buffer->getVertexStrider(o_vertices, 0); buffer->getNormalStrider(o_normals, 0); - //F32 last_weight = F32_MAX; - LLMatrix4a gBlendMat; + F32* __restrict vert = o_vertices[0].mV; + F32* __restrict norm = o_normals[0].mV; const F32* __restrict weights = mMesh->getWeights(); const LLVector4a* __restrict coords = (LLVector4a*) mMesh->getCoords(); const LLVector4a* __restrict normals = (LLVector4a*) mMesh->getNormals(); + U32 offset = mMesh->mFaceVertexOffset*4; + vert += offset; + norm += offset; + for (U32 index = 0; index < mMesh->getNumVertices(); index++) { - U32 bidx = index + mMesh->mFaceVertexOffset; - - // blend by first matrix - F32 w = weights[index]; - - //LLVector4a coord; - //coord.load4a(coords[index].mV); + // equivalent to joint = floorf(weights[index]); + S32 joint = _mm_cvtt_ss2si(_mm_load_ss(weights+index)); + F32 w = weights[index] - joint; - //LLVector4a norm; - //norm.load4a(normals[index].mV); + LLMatrix4a gBlendMat; - S32 joint = llfloor(w); - w -= joint; - - if (w > 0.f) + if (w != 0.f) { - // Try to keep all the accesses to the matrix data as close - // together as possible. This function is a hot spot on the - // Mac. JC + // blend between matrices and apply gBlendMat.setLerp(gJointMatAligned[joint+0], gJointMatAligned[joint+1], w); LLVector4a res; gBlendMat.affineTransform(coords[index], res); - o_vertices[bidx].setVec(res[0], res[1], res[2]); + res.store4a(vert+index*4); gBlendMat.rotate(normals[index], res); - o_normals[bidx].setVec(res[0], res[1], res[2]); + res.store4a(norm+index*4); } else { // No lerp required in this case. LLVector4a res; gJointMatAligned[joint].affineTransform(coords[index], res); - o_vertices[bidx].setVec(res[0], res[1], res[2]); + res.store4a(vert+index*4); gJointMatAligned[joint].rotate(normals[index], res); - o_normals[bidx].setVec(res[0], res[1], res[2]); + res.store4a(norm+index*4); } } -- cgit v1.2.3 From 1ad56f84ef1102803986889cdd5b2a687adb557e Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Fri, 21 May 2010 14:33:05 -0500 Subject: Line endings fix. --- indra/newview/llviewerjointmesh.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'indra/newview/llviewerjointmesh.cpp') diff --git a/indra/newview/llviewerjointmesh.cpp b/indra/newview/llviewerjointmesh.cpp index a7e7bfadd6..91605005e3 100644 --- a/indra/newview/llviewerjointmesh.cpp +++ b/indra/newview/llviewerjointmesh.cpp @@ -797,7 +797,7 @@ void LLViewerJointMesh::updateGeometryOriginal(LLFace *mFace, LLPolyMesh *mMesh) for (U32 index = 0; index < mMesh->getNumVertices(); index++) { // equivalent to joint = floorf(weights[index]); - S32 joint = _mm_cvtt_ss2si(_mm_load_ss(weights+index)); + S32 joint = _mm_cvtt_ss2si(_mm_load_ss(weights+index)); F32 w = weights[index] - joint; LLMatrix4a gBlendMat; -- cgit v1.2.3