summaryrefslogtreecommitdiff
path: root/indra/newview/llviewerjointmesh_sse.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'indra/newview/llviewerjointmesh_sse.cpp')
-rw-r--r--indra/newview/llviewerjointmesh_sse.cpp94
1 files changed, 94 insertions, 0 deletions
diff --git a/indra/newview/llviewerjointmesh_sse.cpp b/indra/newview/llviewerjointmesh_sse.cpp
new file mode 100644
index 0000000000..c4f8ff4fa8
--- /dev/null
+++ b/indra/newview/llviewerjointmesh_sse.cpp
@@ -0,0 +1,94 @@
+/**
+ * @file llviewerjointmesh.cpp
+ * @brief LLV4 class implementation with LLViewerJointMesh class
+ *
+ * Copyright (c) 2007-$CurrentYear$, Linden Research, Inc.
+ * $License$
+ */
+
+//-----------------------------------------------------------------------------
+// Header Files
+//-----------------------------------------------------------------------------
+
+// Do not use precompiled headers, because we need to build this file with
+// SSE support, but not the precompiled header file. JC
+#include "linden_common.h"
+
+#include "llviewerjointmesh.h"
+
+// project includes
+#include "llface.h"
+#include "llpolymesh.h"
+
+// library includes
+#include "lldarray.h"
+#include "llv4math.h" // for LL_VECTORIZE
+#include "llv4matrix3.h"
+#include "llv4matrix4.h"
+#include "v3math.h"
+
+// *NOTE: SSE must be enabled for this module
+
+#if LL_VECTORIZE
+
+static LLV4Matrix4 sJointMat[32];
+
+inline void matrix_translate(LLV4Matrix4& m, const LLMatrix4* w, const LLVector3& j)
+{
+ m.mV[VX] = _mm_loadu_ps(w->mMatrix[VX]);
+ m.mV[VY] = _mm_loadu_ps(w->mMatrix[VY]);
+ m.mV[VZ] = _mm_loadu_ps(w->mMatrix[VZ]);
+ m.mV[VW] = _mm_loadu_ps(w->mMatrix[VW]);
+ m.mV[VW] = _mm_add_ps(m.mV[VW], _mm_mul_ps(_mm_set1_ps(j.mV[VX]), m.mV[VX])); // ( ax * vx ) + vw
+ m.mV[VW] = _mm_add_ps(m.mV[VW], _mm_mul_ps(_mm_set1_ps(j.mV[VY]), m.mV[VY]));
+ m.mV[VW] = _mm_add_ps(m.mV[VW], _mm_mul_ps(_mm_set1_ps(j.mV[VZ]), m.mV[VZ]));
+}
+
+// static
+void LLViewerJointMesh::updateGeometrySSE(LLFace *face, LLPolyMesh *mesh)
+{
+ LLDynamicArray<LLJointRenderData*>& joint_data = mesh->getReferenceMesh()->mJointRenderData;
+
+ //upload joint pivots/matrices
+ for(S32 j = 0, jend = joint_data.count(); j < jend ; ++j )
+ {
+ matrix_translate(sJointMat[j], joint_data[j]->mWorldMatrix,
+ joint_data[j]->mSkinJoint ?
+ joint_data[j]->mSkinJoint->mRootToJointSkinOffset
+ : joint_data[j+1]->mSkinJoint->mRootToParentJointSkinOffset);
+ }
+
+ F32 weight = F32_MAX;
+ LLV4Matrix4 blend_mat;
+
+ LLStrider<LLVector3> o_vertices;
+ LLStrider<LLVector3> o_normals;
+
+ LLVertexBuffer *buffer = face->mVertexBuffer;
+ buffer->getVertexStrider(o_vertices, mesh->mFaceVertexOffset);
+ buffer->getNormalStrider(o_normals, mesh->mFaceVertexOffset);
+
+ const F32* weights = mesh->getWeights();
+ const LLVector3* coords = mesh->getCoords();
+ const LLVector3* normals = mesh->getNormals();
+ for (U32 index = 0, index_end = mesh->getNumVertices(); index < index_end; ++index)
+ {
+ if( weight != weights[index])
+ {
+ S32 joint = llfloor(weight = weights[index]);
+ blend_mat.lerp(sJointMat[joint], sJointMat[joint+1], weight - joint);
+ }
+ blend_mat.multiply(coords[index], o_vertices[index]);
+ ((LLV4Matrix3)blend_mat).multiply(normals[index], o_normals[index]);
+ }
+}
+
+#else
+
+void LLViewerJointMesh::updateGeometrySSE(LLFace *face, LLPolyMesh *mesh)
+{
+ LLViewerJointMesh::updateGeometryVectorized(face, mesh);
+ return;
+}
+
+#endif