diff options
author | James Cook <james@lindenlab.com> | 2007-07-02 23:52:40 +0000 |
---|---|---|
committer | James Cook <james@lindenlab.com> | 2007-07-02 23:52:40 +0000 |
commit | 1a33bc19b4ce94ab210749911dff14409b4454dd (patch) | |
tree | b674d97d37240a29c0a6671adfe950a506ef0ea4 /indra/newview | |
parent | e5124431b54d4342d4677371fccca5bc7250c079 (diff) |
svn merge -r 62595:62596 and 62598:63308 sse-skinning-3 for faster software avatar rendering. Visual Studio 2005 project file fixed pending.
Diffstat (limited to 'indra/newview')
-rw-r--r-- | indra/newview/lldrawable.h | 1 | ||||
-rw-r--r-- | indra/newview/llviewerjointmesh.cpp | 210 | ||||
-rw-r--r-- | indra/newview/llviewerjointmesh.h | 16 | ||||
-rw-r--r-- | indra/newview/llviewerjointmesh_sse.cpp | 94 | ||||
-rw-r--r-- | indra/newview/llviewerjointmesh_sse2.cpp | 96 | ||||
-rw-r--r-- | indra/newview/llviewerjointmesh_vec.cpp | 76 | ||||
-rw-r--r-- | indra/newview/llviewermenu.cpp | 3 |
7 files changed, 467 insertions, 29 deletions
diff --git a/indra/newview/lldrawable.h b/indra/newview/lldrawable.h index 48c58dbb4c..328a116f59 100644 --- a/indra/newview/lldrawable.h +++ b/indra/newview/lldrawable.h @@ -26,6 +26,7 @@ #include "llviewerobject.h" #include "llrect.h" +class LLCamera; class LLDrawPool; class LLDrawable; class LLFace; diff --git a/indra/newview/llviewerjointmesh.cpp b/indra/newview/llviewerjointmesh.cpp index 150943465d..ba4c7e1b20 100644 --- a/indra/newview/llviewerjointmesh.cpp +++ b/indra/newview/llviewerjointmesh.cpp @@ -19,6 +19,7 @@ #include "llfasttimer.h" #include "llagent.h" +#include "llapr.h" #include "llbox.h" #include "lldrawable.h" #include "lldrawpoolavatar.h" @@ -29,12 +30,18 @@ #include "llglheaders.h" #include "lltexlayer.h" #include "llviewercamera.h" +#include "llviewercontrol.h" #include "llviewerimagelist.h" #include "llviewerjointmesh.h" #include "llvoavatar.h" #include "llsky.h" #include "pipeline.h" #include "llglslshader.h" +#include "llmath.h" +#include "v4math.h" +#include "m3math.h" +#include "m4math.h" + #if !LL_DARWIN && !LL_LINUX extern PFNGLWEIGHTPOINTERARBPROC glWeightPointerARB; @@ -48,6 +55,7 @@ static const U32 sRenderMask = LLVertexBuffer::MAP_VERTEX | LLVertexBuffer::MAP_NORMAL | LLVertexBuffer::MAP_TEXCOORD; + //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- // LLViewerJointMesh::LLSkinJoint @@ -100,6 +108,7 @@ BOOL LLSkinJoint::setupSkinJoint( LLViewerJoint *joint) return TRUE; } + //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- // LLViewerJointMesh @@ -394,9 +403,9 @@ const S32 NUM_AXES = 3; // rotation Z 0-n // pivot parent 0-n -- child = n+1 -static LLMatrix4 gJointMat[32]; -static LLMatrix3 gJointRot[32]; -static LLVector4 gJointPivot[32]; +static LLMatrix4 gJointMatUnaligned[32]; +static LLMatrix3 gJointRotUnaligned[32]; +static LLVector4 gJointPivot[32]; //----------------------------------------------------------------------------- // uploadJointMatrices() @@ -417,8 +426,8 @@ void LLViewerJointMesh::uploadJointMatrices() { joint_mat *= LLDrawPoolAvatar::getModelView(); } - gJointMat[joint_num] = joint_mat; - gJointRot[joint_num] = joint_mat.getMat3(); + gJointMatUnaligned[joint_num] = joint_mat; + gJointRotUnaligned[joint_num] = joint_mat.getMat3(); } BOOL last_pivot_uploaded = FALSE; @@ -455,8 +464,8 @@ void LLViewerJointMesh::uploadJointMatrices() { LLVector3 pivot; pivot = LLVector3(gJointPivot[i]); - pivot = pivot * gJointRot[i]; - gJointMat[i].translate(pivot); + pivot = pivot * gJointRotUnaligned[i]; + gJointMatUnaligned[i].translate(pivot); } // upload matrices @@ -467,11 +476,11 @@ void LLViewerJointMesh::uploadJointMatrices() for (joint_num = 0; joint_num < reference_mesh->mJointRenderData.count(); joint_num++) { - gJointMat[joint_num].transpose(); + gJointMatUnaligned[joint_num].transpose(); for (S32 axis = 0; axis < NUM_AXES; axis++) { - F32* vector = gJointMat[joint_num].mMatrix[axis]; + F32* vector = gJointMatUnaligned[joint_num].mMatrix[axis]; //glProgramLocalParameter4fvARB(GL_VERTEX_PROGRAM_ARB, LL_CHARACTER_MAX_JOINTS_PER_MESH * axis + joint_num+5, (GLfloat*)vector); U32 offset = LL_CHARACTER_MAX_JOINTS_PER_MESH*axis+joint_num; memcpy(mat+offset*4, vector, sizeof(GLfloat)*4); @@ -883,21 +892,9 @@ BOOL LLViewerJointMesh::updateLOD(F32 pixel_area, BOOL activate) return (valid != activate); } - -void LLViewerJointMesh::updateGeometry() +// static +void LLViewerJointMesh::updateGeometryOriginal(LLFace *mFace, LLPolyMesh *mMesh) { - if (!(mValid - && mMesh - && mFace - && mMesh->hasWeights() - && mFace->mVertexBuffer.notNull() - && LLShaderMgr::getVertexShaderLevel(LLShaderMgr::SHADER_AVATAR) == 0)) - { - return; - } - - uploadJointMatrices(); - LLStrider<LLVector3> o_vertices; LLStrider<LLVector3> o_normals; @@ -938,9 +935,9 @@ void LLViewerJointMesh::updateGeometry() // No lerp required in this case. if (w == 1.0f) { - gBlendMat = gJointMat[joint+1]; + gBlendMat = gJointMatUnaligned[joint+1]; o_vertices[bidx] = coords[index] * gBlendMat; - gBlendRotMat = gJointRot[joint+1]; + gBlendRotMat = gJointRotUnaligned[joint+1]; o_normals[bidx] = normals[index] * gBlendRotMat; continue; } @@ -948,8 +945,8 @@ void LLViewerJointMesh::updateGeometry() // Try to keep all the accesses to the matrix data as close // together as possible. This function is a hot spot on the // Mac. JC - LLMatrix4 &m0 = gJointMat[joint+1]; - LLMatrix4 &m1 = gJointMat[joint+0]; + LLMatrix4 &m0 = gJointMatUnaligned[joint+1]; + LLMatrix4 &m1 = gJointMatUnaligned[joint+0]; gBlendMat.mMatrix[VX][VX] = lerp(m1.mMatrix[VX][VX], m0.mMatrix[VX][VX], w); gBlendMat.mMatrix[VX][VY] = lerp(m1.mMatrix[VX][VY], m0.mMatrix[VX][VY], w); @@ -969,8 +966,8 @@ void LLViewerJointMesh::updateGeometry() o_vertices[bidx] = coords[index] * gBlendMat; - LLMatrix3 &n0 = gJointRot[joint+1]; - LLMatrix3 &n1 = gJointRot[joint+0]; + LLMatrix3 &n0 = gJointRotUnaligned[joint+1]; + LLMatrix3 &n1 = gJointRotUnaligned[joint+0]; gBlendRotMat.mMatrix[VX][VX] = lerp(n1.mMatrix[VX][VX], n0.mMatrix[VX][VX], w); gBlendRotMat.mMatrix[VX][VY] = lerp(n1.mMatrix[VX][VY], n0.mMatrix[VX][VY], w); @@ -988,6 +985,161 @@ void LLViewerJointMesh::updateGeometry() } } +const U32 UPDATE_GEOMETRY_CALL_MASK = 0x1FFF; // 8K samples before overflow +const U32 UPDATE_GEOMETRY_CALL_OVERFLOW = ~UPDATE_GEOMETRY_CALL_MASK; +static bool sUpdateGeometryCallPointer = false; +static F64 sUpdateGeometryGlobalTime = 0.0 ; +static F64 sUpdateGeometryElapsedTime = 0.0 ; +static F64 sUpdateGeometryElapsedTimeOff = 0.0 ; +static F64 sUpdateGeometryElapsedTimeOn = 0.0 ; +static F64 sUpdateGeometryRunAvgOff[10]; +static F64 sUpdateGeometryRunAvgOn[10]; +static U32 sUpdateGeometryRunCount = 0 ; +static U32 sUpdateGeometryCalls = 0 ; +static U32 sUpdateGeometryLastProcessor = 0 ; +void (*LLViewerJointMesh::sUpdateGeometryFunc)(LLFace* face, LLPolyMesh* mesh); + +void LLViewerJointMesh::updateGeometry() +{ + extern BOOL gVectorizePerfTest; + extern U32 gVectorizeProcessor; + + if (!(mValid + && mMesh + && mFace + && mMesh->hasWeights() + && mFace->mVertexBuffer.notNull() + && LLShaderMgr::getVertexShaderLevel(LLShaderMgr::SHADER_AVATAR) == 0)) + { + return; + } + + if (!gVectorizePerfTest) + { + // Once we've measured performance, just run the specified + // code version. + if(sUpdateGeometryFunc == updateGeometryOriginal) + uploadJointMatrices(); + sUpdateGeometryFunc(mFace, mMesh); + } + else + { + // At startup, measure the amount of time in skinning and choose + // the fastest one. + LLTimer ug_timer ; + + if (sUpdateGeometryCallPointer) + { + if(sUpdateGeometryFunc == updateGeometryOriginal) + uploadJointMatrices(); + // call accelerated version for this processor + sUpdateGeometryFunc(mFace, mMesh); + } + else + { + uploadJointMatrices(); + updateGeometryOriginal(mFace, mMesh); + } + + sUpdateGeometryElapsedTime += ug_timer.getElapsedTimeF64(); + ++sUpdateGeometryCalls; + if(0 != (sUpdateGeometryCalls & UPDATE_GEOMETRY_CALL_OVERFLOW)) + { + F64 time_since_app_start = ug_timer.getElapsedSeconds(); + if(sUpdateGeometryGlobalTime == 0.0 + || sUpdateGeometryLastProcessor != gVectorizeProcessor) + { + sUpdateGeometryGlobalTime = time_since_app_start; + sUpdateGeometryElapsedTime = 0; + sUpdateGeometryCalls = 0; + sUpdateGeometryRunCount = 0; + sUpdateGeometryLastProcessor = gVectorizeProcessor; + sUpdateGeometryCallPointer = false; + return; + } + F64 percent_time_in_function = + ( sUpdateGeometryElapsedTime * 100.0 ) / ( time_since_app_start - sUpdateGeometryGlobalTime ) ; + sUpdateGeometryGlobalTime = time_since_app_start; + if (!sUpdateGeometryCallPointer) + { + // First set of run data is with vectorization off. + sUpdateGeometryCallPointer = true; + llinfos << "profile (avg of " << sUpdateGeometryCalls << " samples) = " + << "vectorize off " << percent_time_in_function + << "% of time with " + << (sUpdateGeometryElapsedTime / (F64)sUpdateGeometryCalls) + << " seconds per call " + << llendl; + sUpdateGeometryRunAvgOff[sUpdateGeometryRunCount] = percent_time_in_function; + sUpdateGeometryElapsedTimeOff += sUpdateGeometryElapsedTime; + sUpdateGeometryCalls = 0; + } + else + { + // Second set of run data is with vectorization on. + sUpdateGeometryCallPointer = false; + llinfos << "profile (avg of " << sUpdateGeometryCalls << " samples) = " + << "VEC on " << percent_time_in_function + << "% of time with " + << (sUpdateGeometryElapsedTime / (F64)sUpdateGeometryCalls) + << " seconds per call " + << llendl; + sUpdateGeometryRunAvgOn[sUpdateGeometryRunCount] = percent_time_in_function ; + sUpdateGeometryElapsedTimeOn += sUpdateGeometryElapsedTime; + + sUpdateGeometryCalls = 0; + sUpdateGeometryRunCount++; + F64 a = 0.0, b = 0.0; + for(U32 i = 0; i<sUpdateGeometryRunCount; i++) + { + a += sUpdateGeometryRunAvgOff[i]; + b += sUpdateGeometryRunAvgOn[i]; + } + a /= sUpdateGeometryRunCount; + b /= sUpdateGeometryRunCount; + F64 perf_boost = ( sUpdateGeometryElapsedTimeOff - sUpdateGeometryElapsedTimeOn ) / sUpdateGeometryElapsedTimeOn; + llinfos << "run averages (" << (F64)sUpdateGeometryRunCount + << "/10) vectorize off " << a + << "% : vectorize type " << gVectorizeProcessor + << " " << b + << "% : performance boost " + << perf_boost * 100.0 + << "%" + << llendl ; + if(sUpdateGeometryRunCount == 10) + { + // In case user runs test again, force reset of data on + // next run. + sUpdateGeometryGlobalTime = 0.0; + + // We have data now on which version is faster. Switch to that + // code and save the data for next run. + gVectorizePerfTest = FALSE; + gSavedSettings.setBOOL("VectorizePerfTest", FALSE); + + if (perf_boost > 0.0) + { + llinfos << "Vectorization improves avatar skinning performance, " + << "keeping on for future runs." + << llendl; + gSavedSettings.setBOOL("VectorizeSkin", TRUE); + } + else + { + // SIMD decreases performance, fall back to original code + llinfos << "Vectorization decreases avatar skinning performance, " + << "switching back to original code." + << llendl; + + gSavedSettings.setBOOL("VectorizeSkin", FALSE); + } + } + } + sUpdateGeometryElapsedTime = 0.0f; + } + } +} + void LLViewerJointMesh::dump() { if (mValid) diff --git a/indra/newview/llviewerjointmesh.h b/indra/newview/llviewerjointmesh.h index b6fd8afcdb..992c3656a1 100644 --- a/indra/newview/llviewerjointmesh.h +++ b/indra/newview/llviewerjointmesh.h @@ -126,6 +126,22 @@ public: /*virtual*/ BOOL isAnimatable() { return FALSE; } void writeCAL3D(apr_file_t* fp, S32 material_num, LLCharacter* characterp); + + // Avatar vertex skinning is a significant performance issue on computers + // with avatar vertex programs turned off (for example, most Macs). We + // therefore have custom versions that use SIMD instructions. + // + // These functions require compiler options for SSE2, SSE, or neither, and + // hence are contained in separate individual .cpp files. JC + static void updateGeometryOriginal(LLFace* face, LLPolyMesh* mesh); + // generic vector code, used for Altivec + static void updateGeometryVectorized(LLFace* face, LLPolyMesh* mesh); + static void updateGeometrySSE(LLFace* face, LLPolyMesh* mesh); + static void updateGeometrySSE2(LLFace* face, LLPolyMesh* mesh); + + // Use a fuction pointer to indicate which version we are running. + static void (*sUpdateGeometryFunc)(LLFace* face, LLPolyMesh* mesh); + private: // Allocate skin data BOOL allocateSkinData( U32 numSkinJoints ); diff --git a/indra/newview/llviewerjointmesh_sse.cpp b/indra/newview/llviewerjointmesh_sse.cpp new file mode 100644 index 0000000000..c4f8ff4fa8 --- /dev/null +++ b/indra/newview/llviewerjointmesh_sse.cpp @@ -0,0 +1,94 @@ +/** + * @file llviewerjointmesh.cpp + * @brief LLV4 class implementation with LLViewerJointMesh class + * + * Copyright (c) 2007-$CurrentYear$, Linden Research, Inc. + * $License$ + */ + +//----------------------------------------------------------------------------- +// Header Files +//----------------------------------------------------------------------------- + +// Do not use precompiled headers, because we need to build this file with +// SSE support, but not the precompiled header file. JC +#include "linden_common.h" + +#include "llviewerjointmesh.h" + +// project includes +#include "llface.h" +#include "llpolymesh.h" + +// library includes +#include "lldarray.h" +#include "llv4math.h" // for LL_VECTORIZE +#include "llv4matrix3.h" +#include "llv4matrix4.h" +#include "v3math.h" + +// *NOTE: SSE must be enabled for this module + +#if LL_VECTORIZE + +static LLV4Matrix4 sJointMat[32]; + +inline void matrix_translate(LLV4Matrix4& m, const LLMatrix4* w, const LLVector3& j) +{ + m.mV[VX] = _mm_loadu_ps(w->mMatrix[VX]); + m.mV[VY] = _mm_loadu_ps(w->mMatrix[VY]); + m.mV[VZ] = _mm_loadu_ps(w->mMatrix[VZ]); + m.mV[VW] = _mm_loadu_ps(w->mMatrix[VW]); + m.mV[VW] = _mm_add_ps(m.mV[VW], _mm_mul_ps(_mm_set1_ps(j.mV[VX]), m.mV[VX])); // ( ax * vx ) + vw + m.mV[VW] = _mm_add_ps(m.mV[VW], _mm_mul_ps(_mm_set1_ps(j.mV[VY]), m.mV[VY])); + m.mV[VW] = _mm_add_ps(m.mV[VW], _mm_mul_ps(_mm_set1_ps(j.mV[VZ]), m.mV[VZ])); +} + +// static +void LLViewerJointMesh::updateGeometrySSE(LLFace *face, LLPolyMesh *mesh) +{ + LLDynamicArray<LLJointRenderData*>& joint_data = mesh->getReferenceMesh()->mJointRenderData; + + //upload joint pivots/matrices + for(S32 j = 0, jend = joint_data.count(); j < jend ; ++j ) + { + matrix_translate(sJointMat[j], joint_data[j]->mWorldMatrix, + joint_data[j]->mSkinJoint ? + joint_data[j]->mSkinJoint->mRootToJointSkinOffset + : joint_data[j+1]->mSkinJoint->mRootToParentJointSkinOffset); + } + + F32 weight = F32_MAX; + LLV4Matrix4 blend_mat; + + LLStrider<LLVector3> o_vertices; + LLStrider<LLVector3> o_normals; + + LLVertexBuffer *buffer = face->mVertexBuffer; + buffer->getVertexStrider(o_vertices, mesh->mFaceVertexOffset); + buffer->getNormalStrider(o_normals, mesh->mFaceVertexOffset); + + const F32* weights = mesh->getWeights(); + const LLVector3* coords = mesh->getCoords(); + const LLVector3* normals = mesh->getNormals(); + for (U32 index = 0, index_end = mesh->getNumVertices(); index < index_end; ++index) + { + if( weight != weights[index]) + { + S32 joint = llfloor(weight = weights[index]); + blend_mat.lerp(sJointMat[joint], sJointMat[joint+1], weight - joint); + } + blend_mat.multiply(coords[index], o_vertices[index]); + ((LLV4Matrix3)blend_mat).multiply(normals[index], o_normals[index]); + } +} + +#else + +void LLViewerJointMesh::updateGeometrySSE(LLFace *face, LLPolyMesh *mesh) +{ + LLViewerJointMesh::updateGeometryVectorized(face, mesh); + return; +} + +#endif diff --git a/indra/newview/llviewerjointmesh_sse2.cpp b/indra/newview/llviewerjointmesh_sse2.cpp new file mode 100644 index 0000000000..cae602ac14 --- /dev/null +++ b/indra/newview/llviewerjointmesh_sse2.cpp @@ -0,0 +1,96 @@ +/** + * @file llviewerjointmesh.cpp + * @brief LLV4 class implementation with LLViewerJointMesh class + * + * Copyright (c) 2007-$CurrentYear$, Linden Research, Inc. + * $License$ + */ + +//----------------------------------------------------------------------------- +// Header Files +//----------------------------------------------------------------------------- + +// Do not use precompiled headers, because we need to build this file with +// SSE support, but not the precompiled header file. JC +#include "linden_common.h" + +#include "llviewerjointmesh.h" + +// project includes +#include "llface.h" +#include "llpolymesh.h" + +// library includes +#include "lldarray.h" +#include "llstrider.h" +#include "llv4math.h" // for LL_VECTORIZE +#include "llv4matrix3.h" +#include "llv4matrix4.h" +#include "m4math.h" +#include "v3math.h" + +// *NOTE: SSE2 must be enabled for this module + +#if LL_VECTORIZE + +static LLV4Matrix4 sJointMat[32]; + +inline void matrix_translate(LLV4Matrix4& m, const LLMatrix4* w, const LLVector3& j) +{ + m.mV[VX] = _mm_loadu_ps(w->mMatrix[VX]); + m.mV[VY] = _mm_loadu_ps(w->mMatrix[VY]); + m.mV[VZ] = _mm_loadu_ps(w->mMatrix[VZ]); + m.mV[VW] = _mm_loadu_ps(w->mMatrix[VW]); + m.mV[VW] = _mm_add_ps(m.mV[VW], _mm_mul_ps(_mm_set1_ps(j.mV[VX]), m.mV[VX])); // ( ax * vx ) + vw + m.mV[VW] = _mm_add_ps(m.mV[VW], _mm_mul_ps(_mm_set1_ps(j.mV[VY]), m.mV[VY])); + m.mV[VW] = _mm_add_ps(m.mV[VW], _mm_mul_ps(_mm_set1_ps(j.mV[VZ]), m.mV[VZ])); +} + +// static +void LLViewerJointMesh::updateGeometrySSE2(LLFace *face, LLPolyMesh *mesh) +{ + LLDynamicArray<LLJointRenderData*>& joint_data = mesh->getReferenceMesh()->mJointRenderData; + + //upload joint pivots/matrices + for(S32 j = 0, jend = joint_data.count(); j < jend ; ++j ) + { + matrix_translate(sJointMat[j], joint_data[j]->mWorldMatrix, + joint_data[j]->mSkinJoint ? + joint_data[j]->mSkinJoint->mRootToJointSkinOffset + : joint_data[j+1]->mSkinJoint->mRootToParentJointSkinOffset); + } + + F32 weight = F32_MAX; + LLV4Matrix4 blend_mat; + + LLStrider<LLVector3> o_vertices; + LLStrider<LLVector3> o_normals; + + LLVertexBuffer *buffer = face->mVertexBuffer; + buffer->getVertexStrider(o_vertices, mesh->mFaceVertexOffset); + buffer->getNormalStrider(o_normals, mesh->mFaceVertexOffset); + + const F32* weights = mesh->getWeights(); + const LLVector3* coords = mesh->getCoords(); + const LLVector3* normals = mesh->getNormals(); + for (U32 index = 0, index_end = mesh->getNumVertices(); index < index_end; ++index) + { + if( weight != weights[index]) + { + S32 joint = llfloor(weight = weights[index]); + blend_mat.lerp(sJointMat[joint], sJointMat[joint+1], weight - joint); + } + blend_mat.multiply(coords[index], o_vertices[index]); + ((LLV4Matrix3)blend_mat).multiply(normals[index], o_normals[index]); + } +} + +#else + +void LLViewerJointMesh::updateGeometrySSE2(LLFace *face, LLPolyMesh *mesh) +{ + LLViewerJointMesh::updateGeometryVectorized(face, mesh); + return; +} + +#endif diff --git a/indra/newview/llviewerjointmesh_vec.cpp b/indra/newview/llviewerjointmesh_vec.cpp new file mode 100644 index 0000000000..5b1e080435 --- /dev/null +++ b/indra/newview/llviewerjointmesh_vec.cpp @@ -0,0 +1,76 @@ +/** + * @file llviewerjointmesh.cpp + * @brief LLV4 math class implementation with LLViewerJointMesh class + * + * Copyright (c) 2001-$CurrentYear$, Linden Research, Inc. + * $License$ + */ + +//----------------------------------------------------------------------------- +// Header Files +//----------------------------------------------------------------------------- +#include "llviewerprecompiledheaders.h" + +#include "llviewerjointmesh.h" + +#include "llface.h" +#include "llpolymesh.h" +#include "llv4math.h" +#include "llv4matrix3.h" +#include "llv4matrix4.h" + +// *NOTE: SSE must be disabled for this module + +#if LL_VECTORIZE +#error This module requires vectorization (i.e. SSE) mode to be disabled. +#endif + +static LLV4Matrix4 sJointMat[32]; + +// static +void LLViewerJointMesh::updateGeometryVectorized(LLFace *face, LLPolyMesh *mesh) +{ + LLDynamicArray<LLJointRenderData*>& joint_data = mesh->getReferenceMesh()->mJointRenderData; + S32 j, joint_num, joint_end = joint_data.count(); + LLV4Vector3 pivot; + + //upload joint pivots/matrices + for(j = joint_num = 0; joint_num < joint_end ; ++joint_num ) + { + LLSkinJoint *sj; + const LLMatrix4 * wm = joint_data[joint_num]->mWorldMatrix; + if (NULL == (sj = joint_data[joint_num]->mSkinJoint)) + { + sj = joint_data[++joint_num]->mSkinJoint; + ((LLV4Matrix3)(sJointMat[j] = *wm)).multiply(sj->mRootToParentJointSkinOffset, pivot); + sJointMat[j++].translate(pivot); + wm = joint_data[joint_num]->mWorldMatrix; + } + ((LLV4Matrix3)(sJointMat[j] = *wm)).multiply(sj->mRootToJointSkinOffset, pivot); + sJointMat[j++].translate(pivot); + } + + F32 weight = F32_MAX; + LLV4Matrix4 blend_mat; + + LLStrider<LLVector3> o_vertices; + LLStrider<LLVector3> o_normals; + + LLVertexBuffer *buffer = face->mVertexBuffer; + buffer->getVertexStrider(o_vertices, mesh->mFaceVertexOffset); + buffer->getNormalStrider(o_normals, mesh->mFaceVertexOffset); + + const F32* weights = mesh->getWeights(); + const LLVector3* coords = mesh->getCoords(); + const LLVector3* normals = mesh->getNormals(); + for (U32 index = 0, index_end = mesh->getNumVertices(); index < index_end; ++index) + { + if( weight != weights[index]) + { + S32 joint = llfloor(weight = weights[index]); + blend_mat.lerp(sJointMat[joint], sJointMat[joint+1], weight - joint); + } + blend_mat.multiply(coords[index], o_vertices[index]); + ((LLV4Matrix3)blend_mat).multiply(normals[index], o_normals[index]); + } +} diff --git a/indra/newview/llviewermenu.cpp b/indra/newview/llviewermenu.cpp index d18859e356..7ad4f1d70b 100644 --- a/indra/newview/llviewermenu.cpp +++ b/indra/newview/llviewermenu.cpp @@ -960,6 +960,7 @@ extern BOOL gDebugClicks; extern BOOL gDebugWindowProc; extern BOOL gDebugTextEditorTips; extern BOOL gDebugSelectMgr; +extern BOOL gVectorizePerfTest; void init_debug_ui_menu(LLMenuGL* menu) { @@ -1169,6 +1170,8 @@ void init_debug_rendering_menu(LLMenuGL* menu) (void*)"ShowDepthBuffer")); sub_menu->append(new LLMenuItemToggleGL("Show Select Buffer", &gDebugSelect)); + sub_menu->append(new LLMenuItemToggleGL("Vectorize Perf Test", &gVectorizePerfTest)); + sub_menu = new LLMenuGL("Render Tests"); sub_menu->append(new LLMenuItemCheckGL("Camera Offset", |