diff options
Diffstat (limited to 'indra/newview/llviewerjointmesh_sse.cpp')
-rw-r--r-- | indra/newview/llviewerjointmesh_sse.cpp | 94 |
1 files changed, 94 insertions, 0 deletions
diff --git a/indra/newview/llviewerjointmesh_sse.cpp b/indra/newview/llviewerjointmesh_sse.cpp new file mode 100644 index 0000000000..c4f8ff4fa8 --- /dev/null +++ b/indra/newview/llviewerjointmesh_sse.cpp @@ -0,0 +1,94 @@ +/** + * @file llviewerjointmesh.cpp + * @brief LLV4 class implementation with LLViewerJointMesh class + * + * Copyright (c) 2007-$CurrentYear$, Linden Research, Inc. + * $License$ + */ + +//----------------------------------------------------------------------------- +// Header Files +//----------------------------------------------------------------------------- + +// Do not use precompiled headers, because we need to build this file with +// SSE support, but not the precompiled header file. JC +#include "linden_common.h" + +#include "llviewerjointmesh.h" + +// project includes +#include "llface.h" +#include "llpolymesh.h" + +// library includes +#include "lldarray.h" +#include "llv4math.h" // for LL_VECTORIZE +#include "llv4matrix3.h" +#include "llv4matrix4.h" +#include "v3math.h" + +// *NOTE: SSE must be enabled for this module + +#if LL_VECTORIZE + +static LLV4Matrix4 sJointMat[32]; + +inline void matrix_translate(LLV4Matrix4& m, const LLMatrix4* w, const LLVector3& j) +{ + m.mV[VX] = _mm_loadu_ps(w->mMatrix[VX]); + m.mV[VY] = _mm_loadu_ps(w->mMatrix[VY]); + m.mV[VZ] = _mm_loadu_ps(w->mMatrix[VZ]); + m.mV[VW] = _mm_loadu_ps(w->mMatrix[VW]); + m.mV[VW] = _mm_add_ps(m.mV[VW], _mm_mul_ps(_mm_set1_ps(j.mV[VX]), m.mV[VX])); // ( ax * vx ) + vw + m.mV[VW] = _mm_add_ps(m.mV[VW], _mm_mul_ps(_mm_set1_ps(j.mV[VY]), m.mV[VY])); + m.mV[VW] = _mm_add_ps(m.mV[VW], _mm_mul_ps(_mm_set1_ps(j.mV[VZ]), m.mV[VZ])); +} + +// static +void LLViewerJointMesh::updateGeometrySSE(LLFace *face, LLPolyMesh *mesh) +{ + LLDynamicArray<LLJointRenderData*>& joint_data = mesh->getReferenceMesh()->mJointRenderData; + + //upload joint pivots/matrices + for(S32 j = 0, jend = joint_data.count(); j < jend ; ++j ) + { + matrix_translate(sJointMat[j], joint_data[j]->mWorldMatrix, + joint_data[j]->mSkinJoint ? + joint_data[j]->mSkinJoint->mRootToJointSkinOffset + : joint_data[j+1]->mSkinJoint->mRootToParentJointSkinOffset); + } + + F32 weight = F32_MAX; + LLV4Matrix4 blend_mat; + + LLStrider<LLVector3> o_vertices; + LLStrider<LLVector3> o_normals; + + LLVertexBuffer *buffer = face->mVertexBuffer; + buffer->getVertexStrider(o_vertices, mesh->mFaceVertexOffset); + buffer->getNormalStrider(o_normals, mesh->mFaceVertexOffset); + + const F32* weights = mesh->getWeights(); + const LLVector3* coords = mesh->getCoords(); + const LLVector3* normals = mesh->getNormals(); + for (U32 index = 0, index_end = mesh->getNumVertices(); index < index_end; ++index) + { + if( weight != weights[index]) + { + S32 joint = llfloor(weight = weights[index]); + blend_mat.lerp(sJointMat[joint], sJointMat[joint+1], weight - joint); + } + blend_mat.multiply(coords[index], o_vertices[index]); + ((LLV4Matrix3)blend_mat).multiply(normals[index], o_normals[index]); + } +} + +#else + +void LLViewerJointMesh::updateGeometrySSE(LLFace *face, LLPolyMesh *mesh) +{ + LLViewerJointMesh::updateGeometryVectorized(face, mesh); + return; +} + +#endif |